Esempio n. 1
0
 def assertDataAlmostEqual(self, data, reference_filename, **kwargs):
     reference_path = self.get_result_path(reference_filename)
     if self._check_reference_file(reference_path):
         kwargs.setdefault('err_msg', 'Reference file %s' % reference_path)
         with open(reference_path, 'r') as reference_file:
             stats = json.load(reference_file)
             self.assertEqual(stats.get('shape', []), list(data.shape))
             self.assertEqual(stats.get('masked', False),
                              ma.is_masked(data))
             nstats = np.array((stats.get('mean', 0.), stats.get('std', 0.),
                                stats.get('max', 0.), stats.get('min', 0.)),
                               dtype=np.float_)
             if math.isnan(stats.get('mean', 0.)):
                 self.assertTrue(math.isnan(data.mean()))
             else:
                 data_stats = np.array((data.mean(), data.std(),
                                        data.max(), data.min()),
                                       dtype=np.float_)
                 self.assertArrayAllClose(nstats, data_stats, **kwargs)
     else:
         self._ensure_folder(reference_path)
         stats = collections.OrderedDict([
             ('std', np.float_(data.std())),
             ('min', np.float_(data.min())),
             ('max', np.float_(data.max())),
             ('shape', data.shape),
             ('masked', ma.is_masked(data)),
             ('mean', np.float_(data.mean()))])
         with open(reference_path, 'w') as reference_file:
             reference_file.write(json.dumps(stats))
Esempio n. 2
0
def sam2mat_main(args):
    region_pattern = r'^[^:]+(?::\d+-\d+)?(?:,[^:]+(?::\d+-\d+)?)?$'
    if args.region is not None and re.search(region_pattern, args.region):
        regions = args.region
    elif args.reglist is not None:
        with open(args.reglist) as f:
            regions = [line.rstrip() for line in f]
    else:
        regions = None

    if args.insam is None:
        sam_fh = sys.stdin
    else:
        sam_fh = open(args.insam, 'r')

    bdata = BinnedData(args.fai, regions=regions, resolution=args.resolution)
    bdata.read_sam(sam_fh)
    sam_fh.close()

    if args.clean:
        bdata.clean()
    if args.ice:
        bdata.iterative_correction()

    margins = bdata.dat.sum(axis=0)
    #print(margins)
    #sys.exit()

    try:
        os.makedirs(args.outdir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise(e)
    bin_outfile = os.path.join(args.outdir, 'bins.txt.gz')
    contact_outfile = os.path.join(args.outdir, 'contacts.txt.gz')
    matrix_outfile = os.path.join(args.outdir, 'matrix.txt.gz')
    bin_f = gzip.open(bin_outfile, 'wb')
    contact_f = gzip.open(contact_outfile, 'wb')
    matrix_f = gzip.open(matrix_outfile, 'wb')

    for i,chrom1,b1 in bdata.iter_bins():
        bin_mid1 = (b1[0]+b1[1])/2
        if ma.is_masked(margins[i]):
            margin = 0
        else:
            margin = int(margins[i])
        print('{}\t{}\t{}\t{}\t{}'.format(chrom1,0,bin_mid1,margin,int(margin>0)), file=bin_f)
        if bdata.cleaned:
            print('\t'.join(bdata.dat.data[i].astype(str)), file=matrix_f)
        else:
            print('\t'.join(bdata.dat[i].astype(str)), file=matrix_f)
        for j,chrom2,b2 in bdata.iter_bins():
            bin_mid2 = (b2[0]+b2[1])/2
            contact = bdata.dat[i,j]
            if j>i and not ma.is_masked(contact) and contact > 0:
                print('{}\t{}\t{}\t{}\t{}'.format(chrom1,bin_mid1,chrom2,bin_mid2,int(contact)), file=contact_f)

    bin_f.close()
    contact_f.close()
    matrix_f.close()
Esempio n. 3
0
def test_ephemerides_query(patch_request):
    # check values of Ceres for a given epoch
    # orbital uncertainty of Ceres is basically zero
    res = jplhorizons.Horizons(id='Ceres', location='500',
                               epochs=2451545.5).ephemerides()[0]

    assert res['targetname'] == "1 Ceres"
    assert res['datetime_str'] == "2000-Jan-01 00:00:00.000"
    assert res['solar_presence'] == ""
    assert res['flags'] == ""
    assert res['elongFlag'] == '/L'

    assert is_masked(res['AZ'])
    assert is_masked(res['EL'])
    assert is_masked(res['airmass'])
    assert is_masked(res['magextinct'])

    npt.assert_allclose(
        [2451544.5,
         188.70280, 9.09829, 34.40955, -2.68358,
         8.27, 6.83, 96.171,
         161.3828, 10.4528, 2.551099014238, 0.1744491,
         2.26315116146176, -21.9390511, 18.822054,
         95.3996, 22.5698, 292.551, 296.850,
         184.3426220, 11.7996521, 289.864329, 71.545655,
         0, 0],
        [res['datetime_jd'],
         res['RA'], res['DEC'], res['RA_rate'], res['DEC_rate'],
         res['V'], res['surfbright'], res['illumination'],
         res['EclLon'], res['EclLat'], res['r'], res['r_rate'],
         res['delta'], res['delta_rate'], res['lighttime'],
         res['elong'], res['alpha'], res['sunTargetPA'], res['velocityPA'],
         res['ObsEclLon'], res['ObsEclLat'], res['GlxLon'], res['GlxLat'],
         res['RA_3sigma'], res['DEC_3sigma']])
Esempio n. 4
0
 def test_addTraceWithGap(self):
     """
     Tests __add__ method of the Trace class.
     """
     # set up
     tr1 = Trace(data=np.arange(1000))
     tr1.stats.sampling_rate = 200
     start = UTCDateTime(2000, 1, 1, 0, 0, 0, 0)
     tr1.stats.starttime = start
     tr2 = Trace(data=np.arange(0, 1000)[::-1])
     tr2.stats.sampling_rate = 200
     tr2.stats.starttime = start + 10
     # verify
     tr1.verify()
     tr2.verify()
     # add
     trace = tr1 + tr2
     # stats
     self.assertEquals(trace.stats.starttime, start)
     self.assertEquals(trace.stats.endtime, start + 14.995)
     self.assertEquals(trace.stats.sampling_rate, 200)
     self.assertEquals(trace.stats.npts, 3000)
     # data
     self.assertEquals(len(trace), 3000)
     self.assertEquals(trace[0], 0)
     self.assertEquals(trace[999], 999)
     self.assertTrue(is_masked(trace[1000]))
     self.assertTrue(is_masked(trace[1999]))
     self.assertEquals(trace[2000], 999)
     self.assertEquals(trace[2999], 0)
     # verify
     trace.verify()
Esempio n. 5
0
def ham6_nearest(pixel, palette, last_color=None):
    if pixel is None or ma.is_masked(pixel):
        return ma.masked, ma.masked

    min_dist = None
    best_index = ma.masked
    best_color = ma.masked

    for i, c in enumerate(palette[:16]):
        d = color_distance(pixel, c)
        if min_dist is None or d < min_dist:
            if d == 0:
                return i, c
            min_dist = d
            best_index = i
            best_color = c

    if last_color is None or ma.is_masked(last_color):
        return best_index, best_color

    c = last_color.copy()

    for i in range(16):
        c[2] = i * 0x11
        d = color_distance(pixel, c)
        if d < min_dist:
            if d == 0:
                return i + 0x10, c
            min_dist = d
            best_index = i + 0x10
            best_color = c.copy()

    c = last_color.copy()

    for i in range(16):
        c[0] = i * 0x11
        d = color_distance(pixel, c)
        if d < min_dist:
            if d == 0:
                return i + 0x20, c
            min_dist = d
            best_index = i + 0x20
            best_color = c.copy()

    c = last_color.copy()

    for i in range(16):
        c[1] = i * 0x11
        d = color_distance(pixel, c)
        if d < min_dist:
            if d == 0:
                return i + 0x30, c
            min_dist = d
            best_index = i + 0x30
            best_color = c.copy()

    return best_index, best_color
Esempio n. 6
0
    def add_average_field(self,
                          field_to_avg,
                          average_func=np.ma.mean,
                          valid_range=None #[20, 300]
                          ):
        """
        Will run an function over the elements of a field to reduce them
        to a single metric for each element, and add this reduced data (e.g.
        the mean value) as a new field to the DataMat.
        
        Obvious example is to compute the average pupil size in a single trial.
        
        Will honor span start and end indices if they are not masked. 
        
        Parameters:
         field_to_avg : string
             the name of the field to process
         average_func : function pointer
             a pointer to the function to use for each element
         valid_range : 2-element sequence (tuple or list)
             if not None, then minimum and maximum dictating the
             range, outside of which the data will be ignored. The data outside
             this range will be masked prior to the averaging.
        
        """
        for fieldname in [field_to_avg]:
            if fieldname not in self.fieldnames():
                raise ValueError("Required field '%s' not in Datamat." % (
                            fieldname))
        avg = []
        for dmi in self:
            dat = dmi.field(field_to_avg)[0]
            if dat is not None:
                sidx = dmi.span_start_idx[0]
                if ma.is_masked(sidx):
                    sidx = 0
                eidx = dmi.span_end_idx[0]
                if ma.is_masked(eidx):
                    eidx = -1
                spandat = dat[sidx:eidx]
                if valid_range is not None:
                    valdat = spandat[(spandat > valid_range[0]) & (spandat < valid_range[1])]
                else:
                    valdat = spandat
                datavg = average_func(valdat) if len(valdat) > 0 else np.NaN
                avg.append(datavg)
            else:
                avg.append(np.NaN)

        avg = ma.masked_invalid(avg)
        avg.fill_value = np.NaN
        fname = get_short_function_name(average_func)
        new_field = (fname) + "_" + field_to_avg

        self.add_field(new_field, avg)
Esempio n. 7
0
def _next_non_masked_element(a, idx):
    """Return the next non masked element of a masked array.

    If an array is masked, return the next non-masked element (if the given index is masked).
    If no other unmasked points are after the given masked point, returns none.

    Parameters
    ----------
    a : array-like
        1-dimensional array of numeric values
    idx : integer
        index of requested element

    Returns
    -------
        Index of next non-masked element and next non-masked element

    """
    try:
        next_idx = idx + a[idx:].mask.argmin()
        if ma.is_masked(a[next_idx]):
            return None, None
        else:
            return next_idx, a[next_idx]
    except (AttributeError, TypeError, IndexError):
        return idx, a[idx]
Esempio n. 8
0
def array_masked_to_nans(array):
    """
    Convert a masked array to a NumPy `ndarray` filled with NaN values. Input
    NumPy arrays with no mask are returned unchanged.
    This is used for dask integration, as dask does not support masked arrays.

    Args:

    * array:
        A NumPy `ndarray` or masked array.

    Returns:
        A NumPy `ndarray`. This is the input array if unmasked, or an array
        of floating-point values with NaN values where the mask was `True` if
        the input array is masked.

    .. note::
        The fill value and mask of the input masked array will be lost.

    .. note::
        Integer masked arrays are cast to 8-byte floats because NaN is a
        floating-point value.

    """
    if not ma.isMaskedArray(array):
        result = array
    else:
        if ma.is_masked(array):
            mask = array.mask
            new_dtype = nan_array_type(array.data.dtype)
            result = array.data.astype(new_dtype)
            result[mask] = np.nan
        else:
            result = array.data
    return result
Esempio n. 9
0
def _math_op_common(cube, operation_function, new_unit, new_dtype=None,
                    in_place=False):
    _assert_is_cube(cube)

    if in_place:
        new_cube = cube
        if cube.has_lazy_data():
            new_cube.data = operation_function(cube.lazy_data())
        else:
            try:
                operation_function(cube.data, out=cube.data)
            except TypeError:
                # Non ufunc function
                operation_function(cube.data)
    else:
        new_cube = cube.copy(data=operation_function(cube.core_data()))

    # If the result of the operation is scalar and masked, we need to fix up
    # the dtype
    if new_dtype is not None \
            and not new_cube.has_lazy_data() \
            and new_cube.data.shape == () \
            and ma.is_masked(new_cube.data):
        new_cube.data = ma.masked_array(0, 1, dtype=new_dtype)

    iris.analysis.clear_phenomenon_identity(new_cube)
    new_cube.units = new_unit
    return new_cube
Esempio n. 10
0
def from_ham6(ham6, palette, background=None):
    if background is None:
        background = ma.masked
    elif isinstance(background, numbers.Integral):
        background = palette[background]

    if ma.is_masked(background) or ma.isMaskedArray(ham6):
        rgb8 = ma.empty(ham6.shape[:2] + (3,), dtype=np.uint8)
    else:
        rgb8 = np.empty(ham6.shape[:2] + (3,), dtype=np.uint8)

    for y in range(rgb8.shape[0]):
        c = background
        for x in range(rgb8.shape[1]):
            i = ham6[y, x]
            if i is ma.masked:
                ham6[y, x] = ma.masked
                continue

            if i < 0x10:
                c = palette[i]
            else:
                c = c.copy()
                c[(None, 2, 0, 1)[i >> 4]] = (i & 0xF) * 0x11

            rgb8[y, x] = c

    return rgb8
Esempio n. 11
0
def to_ham6(img, palette, background=None, out=None):
    _debug_array(img)

    if background is None:
        background = ma.masked
    elif isinstance(background, numbers.Integral):
        background = palette[background]

    if not ma.is_masked(background) and ma.isMaskedArray(img):
        img = img.filled(background)

    if ma.isMaskedArray(img):
        ham6 = ma.empty(img.shape[:2], dtype=np.uint8)
    else:
        ham6 = np.empty(img.shape[:2], dtype=np.uint8)

    for y in range(img.shape[0]):
        c = background
        for x in range(img.shape[1]):
            i, c = ham6_nearest(img[y, x], palette, c)
            ham6[y, x] = i
            if out is not None:
                out[y, x] = c

    _debug_array(ham6)
    return ham6
    def make_gene_map_2(self):
        """
        The method that takes the attributes from the array and uses
        them to create a gene map for the array.
        The gene map is a dictionary which has a binary string as a key.
        The binary string is created by creating a binary bit string of 
        an appropriate length.
        The length is calculated 
        """
        count = 0
        self.iterator_one = itertools.product(range(self.time_len), range(self.lat_len), range(self.lon_len))
        ### Assign a binary string a location and a value from the data
        print "\n"
        print "Creating gene-map dictionary... \n"
        print "Assigning valid locations to binary strings! \n"
        for x_valid in self.iterator_one:
            binary_string = bin(count)[2:]
            while len(binary_string) < self.string_length:  # removed minus one (-1) NB
                binary_string = "0" + binary_string
            self.gene_map[binary_string] = {}
            if ma.is_masked(self.array[x_valid]):
                pass
            else:
                self.gene_map[binary_string]["coordinate"] = tuple(x_valid)
                self.gene_map[binary_string]["value"] = self.array[x_valid]
                self.location_dict[x_valid[1:3]] = []
                self.location_dict_stdevs[x_valid[1:3]] = 0
                count += 1
        self.last_valid_binary_string = binary_string
        binary_string_old = binary_string
        not_valid_first = int(binary_string, 2) + 1
        not_valid_last = int("1" * (self.string_length), 2)  # added minus one just for nonmasked version NB
        self.count = count

        if self.count == self.count_non_masked:
            print "The counter corresponds with the non-masked count! \n"
        ### Pad the dictionary to give binary strings some value
        print "Assigning left over binary strings to non-existant locations! \n"
        self.iterator_two = itertools.product(range(self.time_len), range(self.lat_len), range(self.lon_len))
        # ~ for x_not_valid in range(not_valid_first, not_valid_last+1):
        count_2 = not_valid_first
        for x_not_valid in self.iterator_two:
            # ~ binary_string = bin(x_not_valid)[2:] # DOES IT NEED TO BE PADDED
            binary_string = bin(count_2)[2:]
            while len(binary_string) < self.string_length:  # removed minus one (-1) NB
                binary_string = "0" + binary_string
            self.gene_map[binary_string] = {}
            self.gene_map[binary_string]["coordinate"] = (999, 999, 999)  # x_not_valid
            self.gene_map[binary_string]["value"] = 1e09  # self.array[x_valid]
            if count_2 == not_valid_last:
                break
            else:
                count_2 += 1
        print "There are %d valid locations. \n" % count
        print "The last binary string is: ", binary_string
        print "The last binary string assigned to a valid locations is :", binary_string_old
        print "The length of binary string is: ", self.string_length
        print "The non-valid locations fall between %d and %d. \n" % (not_valid_first, not_valid_last)
        print "Is the array masked?: \n", ma.isMA(self.array)
        print "The gene-map has been created! \n"
Esempio n. 13
0
 def err(coef):
     if ma.is_masked(v):
         res = v.flatten() - EllipticGaussian(x,y,coef).flatten()
         return res.data[res.mask==False]
     else:
         res = v.flatten() - EllipticGaussian(x,y,coef).flatten()
         return res
Esempio n. 14
0
 def err(coef):
     if ma.is_masked(v):
         res = v.flatten() - Gaussian1d(x,coef).flatten()
         return res.data[res.mask==False]
     else:
         res = v.flatten() - Gaussian1d(x,coef).flatten()
         return res
Esempio n. 15
0
 def createMinMaxList(self):
     """
     Creates the minmax list. The method used is not fully accurate but the results
     should be ok.
     """
     pixel = self.win.detail
     data = self.stream.slice(self.starttime, self.endtime)[0].data 
     # Reshape and calculate point to point differences.
     per_pixel = int(len(data)//pixel)
     ptp = data[:pixel * per_pixel].reshape(pixel, per_pixel).ptp(axis=1)
     # Last pixel.
     last_pixel = data[pixel * per_pixel:]
     if len(last_pixel):
         last_pixel = last_pixel.ptp()
         if ptp[-1] < last_pixel:
             ptp[-1] = last_pixel
     self.ptp = ptp.astype('float32')
     # Create a logarithmic axis.
     if self.win.log_scale:
         self.ptp += 1
         self.ptp = np.log(self.ptp)/np.log(self.win.log_scale)
     # Make it go from 0 to 100.
     self.ptp *= 100.0/self.ptp.max()
     # Set masked arrays to zero.
     if is_masked(self.ptp):
         self.ptp.fill_value = 0.0
         self.ptp = self.ptp.filled()
     # Assure that very small values are also visible. Only true gaps are 0
     # and will stay 0.
     self.ptp[(self.ptp > 0) & (self.ptp < 0.5)] = 0.5
Esempio n. 16
0
def load_ham6(fname, palette, background=None, mtimes=None, format=None):
    stem, __ = os.path.splitext(fname)
    fname_cache = stem + '.cache'

    masked = background is None or ma.is_masked(background)

    if mtimes is None:
        mtimes = ()
    elif not isinstance(mtimes, collections.Iterable):
        mtimes = (mtimes,)

    try:
        if os.path.exists(fname):
            mtimes = itertools.chain(mtimes, (os.path.getmtime(fname),))

        for mtime in mtimes:
            if os.path.getmtime(fname_cache) < mtime:
                raise OSError("cache file out of date")

        ham6 = np.genfromtxt(fname_cache, dtype=np.uint8,
                             missing_values=masked and '--',
                             usemask=masked, loose=False, invalid_raise=True)
    except OSError:
        rgb = load_image(fname, masked=masked, format=format)
        ham6 = to_ham6(rgb, palette, background=background)
        out = np.array(ham6, dtype=np.str_)
        if masked:
            out[ma.getmaskarray(ham6)] = '--'
        np.savetxt(fname_cache, out, fmt='%2s', delimiter=' ')

    return ham6
Esempio n. 17
0
 def __call__(self,array):
     masked = ma.is_masked(array)
     if self.method is 'basemap':
        return basemap.interp(array, self.xin, self.yin, self.xout, self.yout, checkbounds=False, masked=masked, order=1)
     elif self.method is 'scipy':
        import scipy.interpolate
        interp = scipy.interpolate.interp2d(self.xin, self.yin, array, kind='linear')
        a1d = interp(self.xout[0,:],self.yout[:,0])
        return npy.reshape(a1d,self.yout.shape)
 def feature_from_tile(self,tile,out):
     if ma.is_masked(tile):
         tile = tile.compressed()
         
     tile = tile.reshape(-1)
     
     out[:] = np.bincount(tile,minlength=self.bin_count)
     
     out/=np.sum(out)
Esempio n. 19
0
def unwrap_py(inph,in_p=(), uv=2*pi):
    """Return the input matrix unwraped the valu given in uv
    
    The same as unwrapv, but using for-s, written in python
    """
    if not is_masked(inph):
        fasei=MaskedArray(inph, isnan(inph))
    else:
        fasei=inph
        
    nx, ny=(fasei.shape[0],fasei.shape[1]) 
    
    # If the initial unwraping point is not given, take the center of the image
    # as initial coordinate
    if in_p==():
        in_p=(int(nx/2),int(ny/2))

    # Create a temporal space to mark if the points are already unwrapped
    # 0 the point has not been unwrapped
    # 1 the point has not been unwrapped, but it is in the unwrapping list 
    # 2 the point was already unwrapped

    fl=zeros((nx, ny))

    # List containing the points to unwrap
    l_un=[in_p]
    fl[in_p]=1

    # unwrapped values
    faseo=fasei.copy()    
    
    while len(l_un)>0:
        # remove the first value from the list
        cx, cy=l_un.pop(0)
    
        # Put the coordinates of unwrapped the neigbors in the list
        # And check for wrapping
        nv=0
        wv=0    
        
        for i in range(cx-1, cx+2):
            for j in range(cy-1, cy+2):
                if (i>-1) and (i<nx) and (j>-1) and (j<ny):
                    if (fl[i, j]==0)&(faseo.mask[i, j]==False):
                        fl[i, j]=1
                        l_un.append((i, j))
                    elif fl[i, j]==2:
                        wv=wv+rint((faseo[i, j]-faseo[cx, cy])/uv)
                        nv=nv+1        
        if nv!=0: 
            wv=wv/nv

        fl[cx, cy]=2
        faseo[cx, cy]=faseo[cx, cy]+wv*uv
        
    return faseo
    def _numpy_interpolation(self, point_num, eval_points):
        """

        Parameters
        ----------
        point_num: int
            Index of class position in values list
        eval_points: ndarray
            Inputs used to evaluate class member function

        Returns
        -------
        ndarray: output from member function
        """
        is_masked = ma.is_masked(eval_points)

        shape = point_num.shape
        ev_shape = eval_points.shape

        vals = self.values[point_num.ravel()]
        eval_points = np.repeat(eval_points, shape[1], axis=0)
        it = np.arange(eval_points.shape[0])

        it = np.repeat(it, eval_points.shape[1], axis=0)

        eval_points = eval_points.reshape(
            eval_points.shape[0] * eval_points.shape[1],
            eval_points.shape[-1]
        )

        scaled_points = eval_points.T
        if is_masked:
            mask = np.invert(ma.getmask(scaled_points[0]))
        else:
            mask = np.ones_like(scaled_points[0], dtype=bool)

        it = ma.masked_array(it, mask)
        scaled_points[0] = (
            (scaled_points[0] - (self._bounds[0][0])) /
            (self._bounds[0][1] - self._bounds[0][0])
        ) * (vals.shape[-2] - 1)
        scaled_points[1] += (
            (scaled_points[1] - (self._bounds[1][0])) /
            (self._bounds[1][1] - self._bounds[1][0])
        ) * (vals.shape[-1] - 1)
        scaled_points = np.vstack((it, scaled_points))

        output = np.zeros(scaled_points.T.shape[:-1])
        output[mask] = map_coordinates(vals, scaled_points.T[mask].T, order=1)

        new_shape = (*shape, ev_shape[-2])
        output = output.reshape(new_shape)

        return ma.masked_array(output, mask=mask)
Esempio n. 21
0
 def __call__(self, obj, base_encoder):
   if isinstance(obj, np.ndarray):
     if obj.ndim == 1:
       return [base_encoder.default(x) for x in obj]
     else:
       return [base_encoder.default(obj[i]) for i in range(obj.shape[0])]
   if isinstance(obj, np.generic):
     a = np.asscalar(obj)
     if (isinstance(a, float) and np.isnan(a)) or ma.is_masked(a):
       return None
     return a
   return None
Esempio n. 22
0
def ssh2psi(lon,lat,ssh):
    from numpy import pi,sin,ndim,ma
    from pylab import meshgrid
    g = 9.8; #r = 6371.e3
    omega = 0.729e-4
    if ndim(lon)==1:
        lon,lat = meshgrid(lon,lat)
    f=2.0*omega*sin(lat*pi/180.0)
    psi = g/f * ssh
    if ma.is_masked(ssh):
        psi=ma.array(psi,mask=ssh.mask)
    return psi
Esempio n. 23
0
def kdtree_sample2d(xin, yin, z2d, xout, yout, distance=2.,method='linear'):
    """ bin random data points to grids
    loc_points: 2 x dpoints, lon, lat
    loc_grids: 2 x dgrids, x.ravel, y.ravel
    """
    from scipy import spatial, ma
    from numpy import meshgrid, exp, array,c_, where
    xip,xin  = find_overlap(xin, xout)
    yip,yin = find_overlap(yin, yout)
    z2ds = z2d[where(yip==True)[0],:][:,where(xip==True)[0]]
        
    ismask = ma.is_masked(z2ds)
    
    xin2d,yin2d = meshgrid(xin, yin)
    
    if ismask:
        xin1d, yin1d = xin2d[z2ds.mask==False].ravel(), yin2d[z2ds.mask==False].ravel()
        z1d = z2ds[z2ds.mask==False]
        locs = c_[xin1d, yin1d]
    else:
        xin1d, yin1d = xin2d.ravel(), yin2d.ravel()
        z1d = z2ds.ravel()
        locs = c_[xin1d, yin1d]
    
    tree = spatial.cKDTree(locs)
    
    grids = zip(xout, yout)

    index = tree.query_ball_point(grids, distance)

    Tmis=[]
    sample_size=[]
    
    for i in range(xout.size):
        ip = index[i]
        if len(ip) == 0:
            Tmis.append(999999)
            sample_size.append(0)
        else:
            dis = ((xin1d[ip]-xout[i])**2+(yin1d[ip]-yout[i])**2)
            if method=='linear':
                dis = ma.masked_greater(dis**0.5, distance)
                weight = distance - dis**0.5
            else:
                weight = exp(-(dis/distance**2))
            weight = weight/weight.sum()
            Tmis.append((weight*z1d[ip]).sum())
            sample_size.append(len(ip))
            
    zout = ma.masked_greater(array(Tmis),1e5)
    sample_size = ma.masked_equal(array(sample_size),0)
    return zout, sample_size
Esempio n. 24
0
def test_extract_overlimit():
    """ Thest a request over the limits of the database """
    db = WOA()

    t = db['TEMP'].extract(var='t_mn', doy=136.875,
            depth=5502, lat=17.5, lon=-37.5)
    assert ma.is_masked(t['t_mn'])

    t = db['TEMP'].extract(var='t_mn', doy=136.875,
            depth=[10, 5502], lat=17.5, lon=-37.5)
    assert np.all(t['t_mn'].mask == [False, True])
    assert ma.allclose(t['t_mn'],
            ma.masked_array([24.62145996, 0], mask=[False, True]))
Esempio n. 25
0
def inline_data_asarray(inline, dtype=None):
    # np.asarray doesn't handle structured arrays unless the innermost
    # elements are tuples.  To do that, we drill down the first
    # element of each level until we find a single item that
    # successfully converts to a scalar of the expected structured
    # dtype.  Then we go through and convert everything at that level
    # to a tuple.  This probably breaks for nested structured dtypes,
    # but it's probably good enough for now.  It also won't work with
    # object dtypes, but ASDF explicitly excludes those, so we're ok
    # there.
    if dtype is not None and dtype.fields is not None:
        def find_innermost_match(l, depth=0):
            if not isinstance(l, list) or not len(l):
                raise ValueError(
                    "data can not be converted to structured array")
            try:
                np.asarray(tuple(l), dtype=dtype)
            except ValueError:
                return find_innermost_match(l[0], depth + 1)
            else:
                return depth
        depth = find_innermost_match(inline)

        def convert_to_tuples(l, data_depth, depth=0):
            if data_depth == depth:
                return tuple(l)
            else:
                return [convert_to_tuples(x, data_depth, depth+1) for x in l]
        inline = convert_to_tuples(inline, depth)

        return np.asarray(inline, dtype=dtype)
    else:
        def handle_mask(inline):
            if isinstance(inline, list):
                if None in inline:
                    inline_array = np.asarray(inline)
                    nones = np.equal(inline_array, None)
                    return np.ma.array(np.where(nones, 0, inline),
                                       mask=nones)
                else:
                    return [handle_mask(x) for x in inline]
            return inline
        inline = handle_mask(inline)

        inline = np.ma.asarray(inline, dtype=dtype)
        if not ma.is_masked(inline):
            return inline.data
        else:
            return inline
Esempio n. 26
0
def wet_spell_analysis(reference_array, threshold=0.1, nyear=1, dt=3.):
    ''' Characterize wet spells using sub-daily (hourly) data

    :param reference_array: an array to be analyzed
    :type reference_array: :class:'numpy.ma.core.MaskedArray'

    :param threshold: the minimum amount of rainfall [mm/hour] 
    :type threshold: 'float'

    :param nyear: the number of discontinous periods 
    :type nyear: 'int'

    :param dt: the temporal resolution of reference_array
    :type dt: 'float'
    '''
    nt = reference_array.shape[0]
    if reference_array.ndim == 3:
        reshaped_array = reference_array.reshape([nt, reference_array.size / nt])
    else:
        reshaped_array = reference_array
    if ma.count_masked(reshaped_array[0,:]) != 0:
        xy_indices = numpy.where(reshaped_array.mask[0, :] == False)[0]
    else:
        xy_indices = numpy.arange(reshaped_array.shape[1])

    nt_each_year = nt / nyear
    spell_duration = []
    peak_rainfall = []
    total_rainfall = []

    for index in xy_indices:
        for iyear in numpy.arange(nyear):
            data0_temp = reshaped_array[nt_each_year * iyear:nt_each_year * (iyear + 1),
                                        index]
            # time indices when precipitation rate is smaller than the
            # threshold [mm/hr]
            t_index = numpy.where((data0_temp <= threshold) &
                               (data0_temp.mask == False))[0]
            t_index = numpy.insert(t_index, 0, 0)
            t_index = t_index + nt_each_year * iyear
            for it in numpy.arange(t_index.size - 1):
                if t_index[it + 1] - t_index[it] > 1:
                    data1_temp = data0_temp[t_index[it] + 1:t_index[it + 1]]
                    if not ma.is_masked(data1_temp):
                        spell_duration.append(
                            (t_index[it + 1] - t_index[it] - 1) * dt)
                        peak_rainfall.append(data1_temp.max())
                        total_rainfall.append(data1_temp.sum())
    return numpy.array(spell_duration), numpy.array(peak_rainfall), numpy.array(total_rainfall)
Esempio n. 27
0
def _percentile(data, axis, percent, **kwargs):
    # NB. scipy.stats.mstats.scoreatpercentile always works across just the
    # first dimension of its input data, and  returns a result that has one
    # fewer dimension than the input.
    # So shape=(3, 4, 5) -> shape(4, 5)
    data = np.rollaxis(data, axis)
    shape = data.shape[1:]
    if shape:
        data = data.reshape([data.shape[0], np.prod(shape)])
    result = scipy.stats.mstats.scoreatpercentile(data, percent, **kwargs)
    if not ma.isMaskedArray(data) and not ma.is_masked(result):
        result = np.asarray(result)
    if shape:
        result = result.reshape(shape)
    return result
Esempio n. 28
0
 def test_set_window(self) : 
     window_data = np.ones( (self.lat_size_win, self.lon_size_win) )
     x = self.w.set_window(window_data)
     
     # check output geometry
     self.assertEqual(x.shape[0], 360)
     self.assertEqual(x.shape[1], 720)
     
     # check output is masked
     self.assertTrue(ma.is_masked(x))
     
     # check that the window is only thing in returned array
     win_masked = ma.count_masked(x)
     win = ma.count(x)
     self.assertEqual(win, window_data.size)
     self.assertEqual(win_masked, x.size - window_data.size)
     self.assertTrue(np.all(x[self.w._window] == window_data))
 def __setitem__(self,key,value):
     """
         Change the value of a tile based on a key (which is the row/col)
     """
     if isinstance(key,list):
         key = tuple(map( np.concatenate ,zip(*key)))
         
     if(ma.is_masked(value)):
         (r_loc,c_loc) = key
         r_min = np.min(r_loc)
         c_min = np.min(c_loc)
         key_offset = (r_loc-r_min,c_loc-c_min)            
         
         self._image[key] = value[key_offset]
     elif np.isscalar(value):
         self._image[key] = value
     else:
         self._image[key] = np.reshape(value,(-1,self._image.shape[2]))
    def feature_from_tile(self,tile,out):
        fs = tile.shape[-1]

        out = out.view();
        out.shape = (self._number_of_locations,fs)
        
        def build_tree_vector(points_r,points_c,levels_left,local_out_array):
            
            tile_rs = tile[points_r,points_c].reshape( -1,fs);
            local_out_array[0,:] = ma.mean(tile_rs,axis=0)
            
                #plt.plot(points_r,points_c,'o')
            if levels_left > 1:
                remaining_out_array = local_out_array[1:,:]
                mean_r = np.mean(points_r);
                mean_c = np.mean(points_c)
                
                offset_size = remaining_out_array.shape[0]/4
        
                top = points_r < mean_r
                bottom = np.logical_not(top)
                left = points_c < mean_c
                right = np.logical_not(left)
                
                quadrents = [ (top,right),(top,left),(bottom,left),(bottom,right)  ]
                
                #Fill the solution for all 4 quadrents 
                for idx,quadrent in enumerate(quadrents):
                    q = np.logical_and(quadrent[0],quadrent[1])
                    q_out = remaining_out_array[ idx*offset_size : (idx+1)*offset_size, : ]
                    build_tree_vector(points_r[q],points_c[q],levels_left - 1,q_out)
                #renormilize 
                remaining_out_array *= .25
                
                
        if ma.is_masked(tile):
            points_r,points_c = np.nonzero(np.logical_not(tile.mask[:,:,0]))
        else:
            grid = np.mgrid[0:tile.shape[0],
                            0:tile.shape[1]]
            points_r = grid[0,:,:].ravel()
            points_c = grid[1,:,:].ravel()
          
        build_tree_vector(points_r,points_c,self._number_of_levels,out)
Esempio n. 31
0
    def calculate_joint_estimate(self):
        # do not use SSNV based estimate if it exceeds 0.3 (this estimate can be unreliable at high TiNs due to
        # germline events)
        if self.ssnv_based_model.TiN <= 0.3 and ~np.isnan(
                self.ascna_based_model.TiN):
            if len(self.ascna_based_model.centroids) > 1:
                reselect_cluster = np.argmin(
                    np.abs(self.ascna_based_model.centroids / 100 -
                           self.ssnv_based_model.TiN))
                self.ascna_based_model.TiN_likelihood = self.ascna_based_model.cluster_TiN_likelihoods[
                    reselect_cluster]
                print('reselected cluster based on SSNVs')
            # combine independent likelihoods
            self.joint_log_likelihood = self.ascna_based_model.TiN_likelihood + self.ssnv_based_model.TiN_likelihood
            # normalize likelihood to calculate posterior
            self.joint_posterior = np.exp(
                self.ascna_based_model.TiN_likelihood +
                self.ssnv_based_model.TiN_likelihood -
                np.nanmax(self.ascna_based_model.TiN_likelihood +
                          self.ssnv_based_model.TiN_likelihood))
            self.joint_posterior = np.true_divide(
                self.joint_posterior, np.nansum(self.joint_posterior))
            self.CI_tin_low = self.TiN_range[next(x[0] for x in enumerate(
                np.cumsum(
                    np.ma.masked_array(
                        np.true_divide(self.joint_posterior,
                                       np.nansum(self.joint_posterior)))))
                                                  if x[1] > 0.025)]
            self.CI_tin_high = self.TiN_range[next(x[0] for x in enumerate(
                np.cumsum(
                    np.ma.masked_array(
                        np.true_divide(self.joint_posterior,
                                       np.nansum(self.joint_posterior)))))
                                                   if x[1] > 0.975)]

            self.TiN_int = np.nanargmax(self.joint_posterior)
            self.TiN = self.TiN_range[self.TiN_int]

            zero_tin_ssnv_model = copy.deepcopy(self.ssnv_based_model)
            zero_tin_ssnv_model.TiN = 0
            zero_tin_ssnv_model.expectation_of_z_given_TiN()
            zero_tin_ssnv_model.maximize_TiN_likelihood()
            zero_total_l = zero_tin_ssnv_model.TiN_likelihood + self.ascna_based_model.TiN_likelihood
            zero_total_l = np.exp(zero_total_l - np.nanmax(zero_total_l))
            self.p_null = np.true_divide(zero_total_l,
                                         np.nansum(zero_total_l))[0]
            print('joint TiN estimate = ' + str(self.TiN))
        # use only ssnv based model
        elif ~np.isnan(self.ascna_based_model.TiN):
            # otherwise TiN estimate is = to aSCNA estimate
            print(
                'SSNV based TiN estimate exceed 0.3 using only aSCNA based estimate'
            )
            self.joint_log_likelihood = self.ascna_based_model.TiN_likelihood
            self.joint_posterior = np.exp(
                self.ascna_based_model.TiN_likelihood -
                np.nanmax(self.ascna_based_model.TiN_likelihood))
            self.joint_posterior = np.true_divide(
                self.joint_posterior, np.nansum(self.joint_posterior))
            self.CI_tin_low = self.TiN_range[next(x[0] for x in enumerate(
                np.cumsum(
                    np.ma.masked_array(
                        np.true_divide(self.joint_posterior,
                                       np.nansum(self.joint_posterior)))))
                                                  if x[1] > 0.025)]
            self.CI_tin_high = self.TiN_range[next(x[0] for x in enumerate(
                np.cumsum(
                    np.ma.masked_array(
                        np.true_divide(self.joint_posterior,
                                       np.nansum(self.joint_posterior)))))
                                                   if x[1] > 0.975)]
            self.TiN_int = np.nanargmax(self.joint_posterior)
            self.TiN = self.TiN_range[self.TiN_int]
            self.p_null = self.joint_posterior[0]
        # use only aSCNA based estimate
        elif ~np.isnan(self.ssnv_based_model.TiN
                       ) and self.ssnv_based_model.TiN <= 0.3:
            print('No aSCNAs only using SSNV based model')
            self.joint_log_likelihood = self.ssnv_based_model.TiN_likelihood
            self.joint_posterior = np.exp(
                self.ssnv_based_model.TiN_likelihood -
                np.nanmax(self.ssnv_based_model.TiN_likelihood))
            self.joint_posterior = np.true_divide(
                self.joint_posterior, np.nansum(self.joint_posterior))
            self.CI_tin_low = self.TiN_range[next(x[0] for x in enumerate(
                np.cumsum(
                    np.ma.masked_array(
                        np.true_divide(self.joint_posterior,
                                       np.nansum(self.joint_posterior)))))
                                                  if x[1] > 0.025)]
            self.CI_tin_high = self.TiN_range[next(x[0] for x in enumerate(
                np.cumsum(
                    np.ma.masked_array(
                        np.true_divide(self.joint_posterior,
                                       np.nansum(self.joint_posterior)))))
                                                   if x[1] > 0.975)]
            self.TiN_int = np.nanargmax(self.joint_posterior)
            self.TiN = self.TiN_range[self.TiN_int]
            zero_tin_ssnv_model = copy.deepcopy(self.ssnv_based_model)
            zero_tin_ssnv_model.TiN = 0
            zero_tin_ssnv_model.expectation_of_z_given_TiN()
            zero_tin_ssnv_model.maximize_TiN_likelihood()
            zero_total_l = zero_tin_ssnv_model.TiN_likelihood
            zero_total_l = np.exp(zero_total_l - np.nanmax(zero_total_l))
            self.p_null = np.true_divide(zero_total_l,
                                         np.nansum(zero_total_l))[0]
        else:
            print('insuffcient data to generate TiN estimate.')
            self.CI_tin_high = 0
            self.CI_tin_low = 0
            self.joint_posterior = np.zeros([self.input.resolution, 1])
            self.joint_posterior[0] = 1
            self.TiN_int = 0
            self.TiN = 0
            self.p_null = 1
        pH1 = self.joint_posterior[self.TiN_int]
        #print(self.joint_posterior)
        #print(self.p_null)
        # code to deal with underflows
        if ma.is_masked(self.p_null):
            self.p_null = 0
        pH0 = self.p_null

        p_model = np.true_divide(self.input.TiN_prior * pH1,
                                 (self.input.TiN_prior * pH1) +
                                 ((1 - self.input.TiN_prior) * pH0))
        if p_model < 0.5 or ~np.isfinite(p_model):
            print('insufficient evidence to justify TiN > 0')
            self.joint_posterior = np.zeros([self.input.resolution, 1])
            self.joint_posterior[0] = 1
            self.TiN_int = 0
            self.TiN = 0
            self.CI_tin_high = 0
            self.CI_tin_low = 0
Esempio n. 32
0
def plot_rgb(
    arr,
    rgb=(0, 1, 2),
    figsize=(10, 10),
    str_clip=2,
    ax=None,
    extent=None,
    title="",
    stretch=None,
):
    """Plot three bands in a numpy array as a composite RGB image.

    Parameters
    ----------
    arr : numpy array
        An n-dimensional array in rasterio band order (bands, rows, columns)
        containing the layers to plot.
    rgb : list (default = (0, 1, 2))
        Indices of the three bands to be plotted.
    figsize : tuple (default = (10, 10)
        The x and y integer dimensions of the output plot.
    str_clip: int (default = 2)
        The percentage of clip to apply to the stretch. Default = 2 (2 and 98).
    ax : object (optional)
        The axes object where the ax element should be plotted.
    extent : tuple (optional)
        The extent object that matplotlib expects (left, right, bottom, top).
    title : string (optional)
        The intended title of the plot.
    stretch : Boolean (optional)
        Application of a linear stretch. If set to True, a linear stretch will
        be applied.

    Returns
    ----------
    ax : axes object
        The axes object associated with the 3 band image.

    Example
    -------

    .. plot::

        >>> import matplotlib.pyplot as plt
        >>> import rasterio as rio
        >>> import earthpy.plot as ep
        >>> from earthpy.io import path_to_example
        >>> with rio.open(path_to_example('rmnp-rgb.tif')) as src:
        ...     img_array = src.read()
        >>> # Ensure the input array doesn't have nodata values like -9999
        >>> ep.plot_rgb(img_array)
        <matplotlib.axes._subplots.AxesSubplot object at 0x...

    """

    if len(arr.shape) != 3:
        raise ValueError("Input needs to be 3 dimensions and in rasterio "
                         "order with bands first")

    # Index bands for plotting and clean up data for matplotlib
    rgb_bands = arr[rgb, :, :]

    if stretch:
        rgb_bands = _stretch_im(rgb_bands, str_clip)

    # If type is masked array - add alpha channel for plotting
    if ma.is_masked(rgb_bands):
        # Build alpha channel
        mask = ~(np.ma.getmask(rgb_bands[0])) * 255

        # Add the mask to the array & swap the axes order from (bands,
        # rows, columns) to (rows, columns, bands) for plotting
        rgb_bands = np.vstack((es.bytescale(rgb_bands),
                               np.expand_dims(mask,
                                              axis=0))).transpose([1, 2, 0])
    else:
        # Index bands for plotting and clean up data for matplotlib
        rgb_bands = es.bytescale(rgb_bands).transpose([1, 2, 0])

    # Then plot. Define ax if it's undefined
    show = False
    if ax is None:
        fig, ax = plt.subplots(figsize=figsize)
        show = True

    ax.imshow(rgb_bands, extent=extent)
    ax.set_title(title)
    ax.set(xticks=[], yticks=[])

    # Multipanel won't work if plt.show is called prior to second plot def
    if show:
        plt.show()
    return ax
Esempio n. 33
0
    def create_var_from_data(self,
                             var_name,
                             data,
                             dims,
                             datatype=DATATYPE_AUXILIARIES,
                             attributes=None,
                             replace_dims=REPLACE_DIMS):
        """
        Create a new variable in the netcdf file starting from a numpy array
        and some metadata (like the name of the dimensions and the attributes).

        The dimensions that do not already exists (in the root group) will be
        created.

        If a dimension is unlimited, it will be saved as a fixed length
        dimension and the unlimited attributed will be lost.

        :param var_name: A string that will be used as the name of the new var
        :param data: A numpy array with the data that must be saved into the
        variable
        :param dims: A list of couples (tuples with two element). The first
        element of the tuple is the name of the dimension and the second one
        is an integer with its length. The order of the list must be such that
        [i[1] for i in dimensions] is the shape of data
        :param datatype: A string that represent the type of data of the
        variable. For example, "f8" means double precision
        :param attributes: A dictionary-like item
        :param replace_dims: A dictionary. If the name of a dimension is inside
        this dictionary, it will be replaced with its corresponding one. This is
        useful because some dimensions in RegCM are saved with another name in
        the CORDEX files (for example, jx becomes x).
        """
        LOGGER.debug('Saving variable %s', var_name)

        if replace_dims is None:
            replace_dims = {}
        if attributes is None:
            attributes = {}

        # Copy dims
        dim_name_list = []
        for dim_name, dim_len in dims:
            LOGGER.debug(
                'A dimension named "%s" is required to create the variable %s',
                dim_name, var_name)
            if dim_name in replace_dims:
                LOGGER.debug(
                    'The name "%s" for a dimension is in the replace dict. It '
                    'will be called "%s" instead',
                    dim_name,
                    replace_dims[dim_name],
                )
                dim_name = replace_dims[dim_name]

            dim_name_list.append(dim_name)

            if dim_name in self.dimensions:
                LOGGER.debug(
                    'Dimension "%s" will not be created because it has already '
                    'been created on the file', dim_name)
                dim_current_len = len(self.dimensions[dim_name])
                if dim_current_len != dim_len:
                    raise ValueError(
                        'The length of dimension {} is already set to {}. '
                        'To save variable "{}", it should be {}!'.format(
                            dim_name, dim_len, var_name, dim_current_len))
            else:
                LOGGER.debug('Creating dimension %s of length %s', dim_name,
                             dim_len)
                if dim_name == 'time':
                    self.createDimension(dim_name, None)
                else:
                    self.createDimension(dim_name, dim_len)

        # Finding the fill value
        if '_FillValue' in attributes:
            fill_value = attributes['_FillValue']
            LOGGER.debug('Using %s as fill value (as requested)', fill_value)
        else:
            if is_masked(data):
                if datatype in NETCDF_DEFAULT_FILL_VALUES:
                    fill_value = NETCDF_DEFAULT_FILL_VALUES[datatype]
                    LOGGER.debug('Using %s as fill value (default value)',
                                 fill_value)
                else:
                    raise ValueError(
                        'Data is masked but not appropriated fill_value has '
                        'been found for datatype %s', datatype)
            else:
                LOGGER.debug(
                    'No _FillValue specified and data is unmasked. Setting '
                    'fill_value flag as False')
                fill_value = False

        # Create the variable
        LOGGER.debug(
            'Creating variable %s, with datatype "%s" and dimensions %s',
            var_name, datatype, tuple(dim_name_list))
        ncdf_variable = self.createVariable(var_name,
                                            datatype,
                                            tuple(dim_name_list),
                                            fill_value=fill_value,
                                            zlib=COMPRESSION,
                                            complevel=COMPRESSION_LEVEL,
                                            shuffle=SHUFFLE,
                                            fletcher32=FLETCHER32)

        # Copy attributes
        for attr, attr_val in attributes.items():
            if attr == '_FillValue' or attr == 'missing_value':
                ncdf_variable.setncattr('missing_value', attr_val)
                continue

            if attr in EXCLUDED_ATTRIBUTES:
                LOGGER.debug(
                    'Avoiding to copy the attribute %s because it is in the '
                    'EXCLUDED_ATTRIBUTES list (file globals.py)', attr)
                continue

            LOGGER.debug(
                'Adding attribute "%s" with value "%s" for variable "%s"',
                attr,
                attr_val,
                var_name,
            )
            ncdf_variable.setncattr(attr, attr_val)

        if is_masked(data):
            LOGGER.debug('Data is masked')
        else:
            LOGGER.debug('Data is not masked')

        # Copy the values into the variable
        LOGGER.debug('Copying data inside the variable')
        ncdf_variable[:] = data

        return ncdf_variable
Esempio n. 34
0
trackStartYear = years[trackStartDate]
trackStartDate = Dates[trackStartDate]

print(len(trackLen))
#for x in range(0,np.int(0.01*len(trackLen))):
for x in range(0, len(trackLen)):
    print("On point %d of %d" % (x, len(trackLen)))
    if x % 10000 == 0:
        print("On track {0}/{1}".format(x, len(trackLen)))
    if trackLen[
            x] < ntracks_min:  # checking to make sure TPV track was longer than two days
        continue

    lat = data.variables['latExtr'][x, :]
    lon = data.variables['lonExtr'][x, :]
    if not ma.is_masked(lat):
        per_life_in_polar = float(np.where(lat <= -60)[0].shape[0]) / float(
            lat.shape[0])  # checking if TPV spent 60% of lifetime in Antarctic
    else:
        per_life_in_polar = float(
            np.where((lat.data <= -60)
                     & (lat.mask != True))[0].shape[0]) / float(
                         np.where((lat.mask != True))[0].shape[0])
    if per_life_in_polar < 0.6:
        istpv = False
    else:
        istpv = True

    if (make_tpv == True):
        if istpv == True:
            perc = 0.0
Esempio n. 35
0
            fsm.magtoflux(fm.SPITZER, fm.IRAC3, i['5.8mag_Get']).value *
            (i['e_5.8mag_Get'] if i['e_5.8mag_Get'] else 0.01),
            fsm.magtoflux(fm.SPITZER, fm.IRAC4, i['8.0mag_Get']).value,
            fsm.magtoflux(fm.SPITZER, fm.IRAC4, i['8.0mag_Get']).value *
            (i['e_8.0mag_Get'] if i['e_8.0mag_Get'] else 0.01))
        k = 6
    else:
        k = -1
        use_filts = [1, 1, 1, 1, 1, 1, 1, 1]
        nfilts = 3 + 5

    if allfilts:
        #fm.GAIA
        k += 1
        val = fsm.magtoflux(fm.GAIA, fm.GAIA_G, i['phot_g_mean_mag_Ga']).value
        if ma.is_masked(val):
            photline += " -999  -999 "
            use_filts[k] = 0
        else:
            photline += " %5.4e  %5.4e " % (val, val *
                                            i['phot_g_mean_flux_error_Ga'] /
                                            i['phot_g_mean_flux_Ga'])
            use_filts[k] = 1
        k += 1
        val = fsm.magtoflux(fm.GAIA, fm.GAIA_B, i['phot_bp_mean_mag_Ga']).value
        if ma.is_masked(val):
            photline += " -999  -999 "
            use_filts[k] = 0
        else:
            photline += " %5.4e  %5.4e " % (val, val *
                                            i['phot_bp_mean_flux_error_Ga'] /
Esempio n. 36
0
def build_dimension_coordinate(engine,
                               cf_coord_var,
                               coord_name=None,
                               coord_system=None):
    """Create a dimension coordinate (DimCoord) and add it to the cube."""

    cf_var = engine.cf_var
    cube = engine.cube
    attributes = {}

    attr_units = get_attr_units(cf_coord_var, attributes)
    points_data = cf_coord_var[:]
    # Gracefully fill points masked array.
    if ma.is_masked(points_data):
        points_data = ma.filled(points_data)
        msg = "Gracefully filling {!r} dimension coordinate masked points"
        warnings.warn(msg.format(str(cf_coord_var.cf_name)))

    # Get any coordinate bounds.
    cf_bounds_var, climatological = get_cf_bounds_var(cf_coord_var)
    if cf_bounds_var is not None:
        bounds_data = cf_bounds_var[:]
        # Gracefully fill bounds masked array.
        if ma.is_masked(bounds_data):
            bounds_data = ma.filled(bounds_data)
            msg = "Gracefully filling {!r} dimension coordinate masked bounds"
            warnings.warn(msg.format(str(cf_coord_var.cf_name)))
        # Handle transposed bounds where the vertex dimension is not
        # the last one. Test based on shape to support different
        # dimension names.
        if cf_bounds_var.shape[:-1] != cf_coord_var.shape:
            bounds_data = reorder_bounds_data(bounds_data, cf_bounds_var,
                                              cf_coord_var)
    else:
        bounds_data = None

    # Determine whether the coordinate is circular.
    circular = False
    if (points_data.ndim == 1 and coord_name
            in [CF_VALUE_STD_NAME_LON, CF_VALUE_STD_NAME_GRID_LON]
            and cf_units.Unit(attr_units)
            in [cf_units.Unit("radians"),
                cf_units.Unit("degrees")]):
        modulus_value = cf_units.Unit(attr_units).modulus
        circular = iris.util._is_circular(points_data,
                                          modulus_value,
                                          bounds=bounds_data)

    # Determine the name of the dimension/s shared between the CF-netCDF data variable
    # and the coordinate being built.
    common_dims = [
        dim for dim in cf_coord_var.dimensions if dim in cf_var.dimensions
    ]
    data_dims = None
    if common_dims:
        # Calculate the offset of each common dimension.
        data_dims = [cf_var.dimensions.index(dim) for dim in common_dims]

    # Determine the standard_name, long_name and var_name
    standard_name, long_name, var_name = get_names(cf_coord_var, coord_name,
                                                   attributes)

    # Create the coordinate.
    try:
        coord = iris.coords.DimCoord(
            points_data,
            standard_name=standard_name,
            long_name=long_name,
            var_name=var_name,
            units=attr_units,
            bounds=bounds_data,
            attributes=attributes,
            coord_system=coord_system,
            circular=circular,
            climatological=climatological,
        )
    except ValueError as e_msg:
        # Attempt graceful loading.
        coord = iris.coords.AuxCoord(
            points_data,
            standard_name=standard_name,
            long_name=long_name,
            var_name=var_name,
            units=attr_units,
            bounds=bounds_data,
            attributes=attributes,
            coord_system=coord_system,
            climatological=climatological,
        )
        cube.add_aux_coord(coord, data_dims)
        msg = ("Failed to create {name!r} dimension coordinate: {error}\n"
               "Gracefully creating {name!r} auxiliary coordinate instead.")
        warnings.warn(msg.format(name=str(cf_coord_var.cf_name), error=e_msg))
    else:
        # Add the dimension coordinate to the cube.
        if data_dims:
            cube.add_dim_coord(coord, data_dims)
        else:
            # Scalar coords are placed in the aux_coords container.
            cube.add_aux_coord(coord, data_dims)

    # Update the coordinate to CF-netCDF variable mapping.
    engine.cube_parts["coordinates"].append((coord, cf_coord_var.cf_name))
Esempio n. 37
0
from obspy.core import read, Stream, Trace
from glob import iglob
import numpy as np
from numpy.ma import is_masked

folder =\
'/Users/lion/Documents/workspace/TestFiles/archive/RJOB/EHE.D/output/*_index.mseed'

st = read(folder)
#XXX: This fix is just for wrong index files. Remove the next time around.
for trace in st:
    trace.stats.sampling_rate = 1000.0 / (24 * 60 * 60)

st.merge()

# Set masked arrays to zero.
if is_masked(st[0].data):
    st[0].data.fill_value = 0.0
    st[0].data = st[0].data.filled()

st.write('BW.RJOB..EHE.2009.index', format='MSEED')
Esempio n. 38
0
def plot_rgb(arr,
             rgb=(0, 1, 2),
             ax=None,
             extent=None,
             title="",
             figsize=(10, 10),
             stretch=None,
             str_clip=2):
    """Plot three bands in a numpy array as a composite RGB image.

    Parameters
    ----------
    arr: numpy array
        An n dimension numpy array in rasterio band order (bands, x, y)
    rgb: list
        Indices of the three bands to be plotted (default = 0,1,2)
    extent: tuple
        The extent object that matplotlib expects (left, right, bottom, top)
    title: string (optional)
        String representing the title of the plot
    ax: object
        The axes object where the ax element should be plotted. Default = none
    figsize: tuple (optional)
        The x and y integer dimensions of the output plot if preferred to set.
    stretch: Boolean
        If True a linear stretch will be applied
    str_clip: int (optional)
        The % of clip to apply to the stretch. Default = 2 (2 and 98)

    Returns
    ----------
    fig, ax : figure object, axes object
        The figure and axes object associated with the 3 band image. If the
        ax keyword is specified,
        the figure return will be None.
    """

    if len(arr.shape) != 3:
        raise Exception("""Input needs to be 3 dimensions and in rasterio
                           order with bands first""")

    # Index bands for plotting and clean up data for matplotlib
    rgb_bands = arr[rgb]

    if stretch:
        s_min = str_clip
        s_max = 100 - str_clip
        arr_rescaled = np.zeros_like(rgb_bands)
        for ii, band in enumerate(rgb_bands):
            lower, upper = np.percentile(band, (s_min, s_max))
            arr_rescaled[ii] = exposure.rescale_intensity(band,
                                                          in_range=(lower,
                                                                    upper))
        rgb_bands = arr_rescaled.copy()

    # If type is masked array - add alpha channel for plotting
    if ma.is_masked(rgb_bands):
        # Build alpha channel
        mask = ~(np.ma.getmask(rgb_bands[0])) * 255

        # Add the mask to the array & swap the axes order from (bands,
        # rows, columns) to (rows, columns, bands) for plotting
        rgb_bands = np.vstack((bytescale(rgb_bands),
                               np.expand_dims(mask, axis=0))).\
            transpose([1, 2, 0])
    else:
        # Index bands for plotting and clean up data for matplotlib
        rgb_bands = bytescale(rgb_bands).transpose([1, 2, 0])

    # Then plot. Define ax if it's default to none
    if ax is None:
        fig, ax = plt.subplots(figsize=figsize)
    else:
        fig = None
    ax.imshow(rgb_bands, extent=extent)
    ax.set_title(title)
    ax.set(xticks=[], yticks=[])
    return fig, ax
Esempio n. 39
0
    def make_page(self):
        success = True
        # check all models.tab files and existence of all therein
        t = ModelSet.all_sets()
        failed = list()
        for n, z, md, m in zip(list(t["name"]), list(t["z"]),
                               list(t["medium"]), list(t["mass"])):
            print(n, z, md, m)
            mdict = dict()
            ms = ModelSet(name=n, z=z, medium=md, mass=m)
            mp = ModelPlot(ms)
            # stop complaining about too many figures
            mp._plt.rcParams.update({'figure.max_open_warning': 0})
            print(f'Making page for {n,z,md,m}')
            if m is None or ma.is_masked(m):
                dir = f'{n}_{z}_{md}'
            else:
                dir = f'{n}_{z}_{md}_{m}'
            dir = dir.replace(' ', '_')
            os.mkdir(f'/tmp/mpound/{dir}')
            index = open(f'/tmp/mpound/{dir}/index.html', 'w')
            index.write(
                f'<html><head> <meta charset="utf-8">\n <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">\n <meta name="description" content="Tools to analyze observations of photodissociation regions">\n <meta name="author" content="Marc W. Pound">\n <title>PhotoDissociation Region Toolbox {dir}</title>\n <!-- Font Awesome icons (free version)-->\n <script src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.13.0/js/all.min.js" crossorigin="anonymous"></script>\n <!-- Font Awesome accessibility options -->\n <script src="https://use.fontawesome.com/824d9b17ca.js"></script>\n <link href="http://dustem.astro.umd.edu/freelancer/css/styles.css" rel="stylesheet">\n <!-- from https://startbootstrap.com/themes/freelancer/-->\n <link rel="stylesheet" href="http://dustem.astro.umd.edu/freelancer/css/heading.css">\n <link rel="stylesheet" href="http://dustem.astro.umd.edu/freelancer/css/body.css">\n \n <!-- PDRT specific CSS -->\n <link href="http://dustem.astro.umd.edu/css/pdrt.css" rel="stylesheet">\n </head><body><br>'
            )
            index.write(
                '<table class="table mytable table-striped table-striped table-bordered" bgcolor="white" >\n<tr>'
            )

            i = 0
            numcols = 4
            for r in ms.table["ratio"]:
                if i != 0 and i % numcols == 0:
                    index.write("</tr>\n<tr>")
                try:
                    model = ms.get_model(r)
                    modelfile = ms.table.loc[r]["filename"]
                    if "/" in model._title:
                        model._title += " Intensity Ratio"
                    else:
                        if "FIR" not in model._title and "Surface" not in model._title and "A_V" not in model._title:
                            model._title += " Intensity"
                    model._title = model._title.replace(
                        "$\mu$",
                        "&micro;").replace("$_{FIR}$",
                                           "<sub>FIR</sub>").replace(
                                               "$_2$", "<sub>2</sub>").replace(
                                                   "$A_V$", "A<sub>V</sub>")
                    #.replace("$T_S$","T<sub>S</sub>")
                    #.replace("$^{13}$","<sup>13</sup>")
                    #print(f"doing {r} = {modelfile}.png title={model._title}")
                    if "$" in model._title:
                        print(
                            f"############ OOPS missed some latex {model._title}"
                        )
                    fig_out = f'{dir}/{modelfile}.png'
                    fig_html = f'{dir}/{modelfile}.html'
                    f_html = f'{modelfile}.html'
                    index.write(
                        f'<td><a href="{f_html}">{model._title}</a></td>')

                    mdict[r] = fig_html
                    i = i + 1
                    if False:
                        if model.header["CTYPE1"] == "T_e":
                            # Iron line ratios are function of electron temperature and electron density
                            # not H2 density and radiation field.
                            mp.plot(r, label=True, norm="log", cmap='plasma')
                        else:
                            mp.plot(r,
                                    yaxis_unit="Habing",
                                    label=True,
                                    norm="log",
                                    cmap='plasma')
                        mp.savefig(f'/tmp/mpound/{fig_out}')
                        # This is supposed to stop complaints about
                        # too many figures, but actually does not!
                        mp._plt.close(mp.figure)
                except Exception as e:
                    success = False
                    failed.append(f'{r} {modelfile} : {str(e)}\n')
            if not success:
                print("Couldn't open these models:", failed)
            index.write('</tr></table></body></html>')
            index.close()
Esempio n. 40
0
def unwrapv(inph, in_p=(), uv=2 * pi):
    """Return the input matrix unwrapped the value given in uv

    This is a vectorized routine, but is not as fast as it should
    """

    if not is_masked(inph):
        fasei = MaskedArray(inph, isnan(inph))
    else:
        fasei = inph.copy()

    size = fasei.shape
    nx, ny = size
    # If the initial unwraping point is not given, take the center of the image
    # as initial coordinate
    if in_p == ():
        in_p = (int(size[0] / 2), int(size[1] / 2))

    # Create a temporal space to mark if the points are already unwrapped
    # 0 the point has not been unwrapped
    # 1 the point has not been unwrapped, but it is in the unwrapping list
    # 2 the point was already unwrapped

    fl = N.zeros(size)

    # List containing the points to unwrap
    l_un = [in_p]
    fl[in_p] = 1

    # unwrapped values
    faseo = fasei.copy()
    XI_, YI_ = meshgrid(range(-1, 2), range(-1, 2))
    XI_ = XI_.flatten()
    YI_ = YI_.flatten()
    while len(l_un) > 0:
        # remove the first value from the list
        unp = l_un.pop(0)
        #l_un[0:1]=[]
        XI = XI_ + unp[0]
        YI = YI_ + unp[1]
        #Remove from the list the values where XI is negative
        nxi = XI > -1
        nyi = YI > -1
        nxf = XI < nx
        nyf = YI < ny
        n = nonzero(nxi & nyi & nxf & nyf)
        lco = zip(XI[n], YI[n])

        # Put the coordinates of unwrapped the neigbors in the list

        # And check for wrapping
        nv = 0
        wv = 0

        for co in lco:
            if (fl[co] == 0) & (faseo.mask[co] == False):
                fl[co] = 1
                l_un.append(co)
            elif fl[co] == 2:
                wv = wv + rint((faseo[co] - faseo[unp]) / uv)
                nv = nv + 1

        if nv != 0:
            wv = wv / nv
            #if wv>=0: wv=int(wv+0.5)
            #else: wv=int(wv-0.5)
        fl[unp] = 2
        faseo[unp] = faseo[unp] + wv * uv

    return faseo
Esempio n. 41
0
    def min_max_clip_version_5(cls, file_data: ndarray,
                               number_dropped_values: int, console: Console,
                               session_controller: SessionController):
        console.push_level()
        console.message(
            f"Using min-max clip with {number_dropped_values} iterations", +1)
        masked_array = ma.MaskedArray(file_data)
        drop_counter = 1
        while drop_counter <= number_dropped_values:
            cls.check_cancellation(session_controller)
            console.push_level()
            console.message(
                f"Iteration {drop_counter} of {number_dropped_values}.", +1)
            drop_counter += 1
            # Find the minimums in all columns.  This will give a 2d matrix the same size as the images
            # with the column-minimum in each position
            minimum_values = masked_array.min(axis=0)
            cls.check_cancellation(session_controller)

            # Now compare that matrix of minimums down the layers, so we get Trues where
            # each minimum exists in its column (minimums might exist more than once, and
            # we want to find all of them)
            masked_array = ma.masked_where(masked_array == minimum_values,
                                           masked_array)
            cls.check_cancellation(session_controller)
            console.message("Masked minimums.", +1, temp=True)

            # Now find and mask the maximums, same approach
            maximum_values = masked_array.max(axis=0)
            masked_array = ma.masked_where(masked_array == maximum_values,
                                           masked_array)
            cls.check_cancellation(session_controller)
            console.message("Masked maximums.", +1, temp=True)
            console.pop_level()

        console.message(f"Calculating mean of remaining data.", 0)
        masked_means = numpy.mean(masked_array, axis=0)
        cls.check_cancellation(session_controller)
        # If the means matrix contains any masked values, that means that in that column the clipping
        # eliminated *all* the data.  We will find the offending columns and re-calculate those with
        # fewer dropped extremes.  This should exactly reproduce the results of the cell-by-cell methods
        if ma.is_masked(masked_means):
            console.message(
                "Some columns lost all their values; reducing drops for those columns.",
                0)
            #  Get the mask, and get a 2D matrix showing which columns were entirely masked
            the_mask = masked_array.mask
            eliminated_columns_map = ndarray.all(the_mask, axis=0)
            masked_coordinates = numpy.where(eliminated_columns_map)
            cls.check_cancellation(session_controller)
            x_coordinates = masked_coordinates[0]
            y_coordinates = masked_coordinates[1]
            assert len(x_coordinates) == len(y_coordinates)
            repairs = len(x_coordinates)
            cp = "s" if repairs > 1 else ""
            np = "" if repairs > 1 else "s"
            console.message(f"{repairs} column{cp} need{np} repair.", +1)
            for index in range(repairs):
                cls.check_cancellation(session_controller)
                # print(".", end="\n" if (index > 0) and (index % 50 == 0) else "")
                column_x = x_coordinates[index]
                column_y = y_coordinates[index]
                column = file_data[:, column_x, column_y]
                min_max_clipped_mean: int = round(
                    cls.calc_mm_clipped_mean(column, number_dropped_values - 1,
                                             console, session_controller))
                masked_means[column_x, column_y] = min_max_clipped_mean
            # We've replaced the problematic columns, now the mean should calculate cleanly
            assert not ma.is_masked(masked_means)
        console.pop_level()
        return masked_means.round()
Esempio n. 42
0
def pearsonr(cube_a,
             cube_b,
             corr_coords=None,
             weights=None,
             mdtol=1.,
             common_mask=False):
    """
    Calculate the Pearson's r correlation coefficient over specified
    dimensions.

    Args:

    * cube_a, cube_b (cubes):
        Cubes between which the correlation will be calculated.  The cubes
        should either be the same shape and have the same dimension coordinates
        or one cube should be broadcastable to the other.
    * corr_coords (str or list of str):
        The cube coordinate name(s) over which to calculate correlations. If no
        names are provided then correlation will be calculated over all common
        cube dimensions.
    * weights (numpy.ndarray, optional):
        Weights array of same shape as (the smaller of) cube_a and cube_b. Note
        that latitude/longitude area weights can be calculated using
        :func:`iris.analysis.cartography.area_weights`.
    * mdtol (float, optional):
        Tolerance of missing data. The missing data fraction is calculated
        based on the number of grid cells masked in both cube_a and cube_b. If
        this fraction exceed mdtol, the returned value in the corresponding
        cell is masked. mdtol=0 means no missing data is tolerated while
        mdtol=1 means the resulting element will be masked if and only if all
        contributing elements are masked in cube_a or cube_b. Defaults to 1.
    * common_mask (bool):
        If True, applies a common mask to cube_a and cube_b so only cells which
        are unmasked in both cubes contribute to the calculation. If False, the
        variance for each cube is calculated from all available cells. Defaults
        to False.

    Returns:
        A cube of the correlation between the two input cubes along the
        specified dimensions, at each point in the remaining dimensions of the
        cubes.

        For example providing two time/altitude/latitude/longitude cubes and
        corr_coords of 'latitude' and 'longitude' will result in a
        time/altitude cube describing the latitude/longitude (i.e. pattern)
        correlation at each time/altitude point.

    Reference:
        http://www.statsoft.com/textbook/glosp.html#Pearson%20Correlation

    This operation is non-lazy.

    """

    # Assign larger cube to cube_1
    if cube_b.ndim > cube_a.ndim:
        cube_1 = cube_b
        cube_2 = cube_a
    else:
        cube_1 = cube_a
        cube_2 = cube_b

    dim_coords_1 = [coord.name() for coord in cube_1.dim_coords]
    dim_coords_2 = [coord.name() for coord in cube_2.dim_coords]
    common_dim_coords = list(set(dim_coords_1) & set(dim_coords_2))
    # If no coords passed then set to all common dimcoords of cubes.
    if corr_coords is None:
        corr_coords = common_dim_coords

    smaller_shape = cube_2.shape

    # Match up data masks if required.
    if common_mask:
        # Create a cube of 1's with a common mask.
        if ma.is_masked(cube_2.data):
            mask_cube = _ones_like(cube_2)
        else:
            mask_cube = 1.
        if ma.is_masked(cube_1.data):
            # Take a slice to avoid unnecessary broadcasting of cube_2.
            slice_coords = [
                dim_coords_1[i] for i in range(cube_1.ndim)
                if dim_coords_1[i] not in common_dim_coords and np.array_equal(
                    cube_1.data.mask.any(axis=i), cube_1.data.mask.all(axis=i))
            ]
            cube_1_slice = next(cube_1.slices_over(slice_coords))
            mask_cube = _ones_like(cube_1_slice) * mask_cube
        # Apply common mask to data.
        if isinstance(mask_cube, iris.cube.Cube):
            cube_1 = cube_1 * mask_cube
            cube_2 = mask_cube * cube_2
            dim_coords_2 = [coord.name() for coord in cube_2.dim_coords]

    # Broadcast weights to shape of cubes if necessary.
    if weights is None or cube_1.shape == smaller_shape:
        weights_1 = weights
        weights_2 = weights
    else:
        if weights.shape != smaller_shape:
            raise ValueError("weights array should have dimensions {}".format(
                smaller_shape))

        dims_1_common = [
            i for i in range(cube_1.ndim)
            if dim_coords_1[i] in common_dim_coords
        ]
        weights_1 = broadcast_to_shape(weights, cube_1.shape, dims_1_common)
        if cube_2.shape != smaller_shape:
            dims_2_common = [
                i for i in range(cube_2.ndim)
                if dim_coords_2[i] in common_dim_coords
            ]
            weights_2 = broadcast_to_shape(weights, cube_2.shape,
                                           dims_2_common)
        else:
            weights_2 = weights

    # Calculate correlations.
    s1 = cube_1 - cube_1.collapsed(
        corr_coords, iris.analysis.MEAN, weights=weights_1)
    s2 = cube_2 - cube_2.collapsed(
        corr_coords, iris.analysis.MEAN, weights=weights_2)

    covar = (s1 * s2).collapsed(corr_coords,
                                iris.analysis.SUM,
                                weights=weights_1,
                                mdtol=mdtol)
    var_1 = (s1**2).collapsed(corr_coords,
                              iris.analysis.SUM,
                              weights=weights_1)
    var_2 = (s2**2).collapsed(corr_coords,
                              iris.analysis.SUM,
                              weights=weights_2)

    denom = iris.analysis.maths.apply_ufunc(np.sqrt,
                                            var_1 * var_2,
                                            new_unit=covar.units)
    corr_cube = covar / denom
    corr_cube.rename("Pearson's r")

    return corr_cube
Esempio n. 43
0
def write_llh_to_gdal(llh_data,
                      lon_min,
                      dlon,
                      lat_min,
                      dlat,
                      gdal_format,
                      dst_filename,
                      origin_up=True,
                      options=None,
                      nodata_value=None,
                      vflip_data=False):
    """Write an LLH layer to a GIS file in a gdal supported format.

    vflip_data: if True llh_data => llh_data[::-1,:]. Use in case the data
    is not aligned with the desired geotranform.
    """

    gdal_type = get_gdal_type(llh_data.dtype)

    # Get the driver and open the output file

    driver = gdal.GetDriverByName(gdal_format)
    if driver == None:
        raise Exception('Unimplented gdal driver: %s' % driver)

    dst_ds = driver.Create(dst_filename,
                           llh_data.shape[1],
                           llh_data.shape[0],
                           bands=1,
                           eType=gdal_type)  #, options=options )

    # Flip the data if needed to be consistent with the geotransform

    if vflip_data:
        llh_data = llh_data[::-1, :]

    # Set all of the transform information

    if origin_up:
        nlat = llh_data.shape[0]
        lat_max = lat_min + (nlat - 1) * dlat
        dst_ds.SetGeoTransform([lon_min, dlon, 0, lat_max, 0, -dlat])
    else:
        dst_ds.SetGeoTransform([lon_min, dlon, 0, lat_min, 0, dlat])
    srs = osr.SpatialReference()
    srs.SetWellKnownGeogCS('WGS84')
    dst_ds.SetProjection(srs.ExportToWkt())

    # Now write the raster

    band = dst_ds.GetRasterBand(1)

    if nodata_value != None:
        band.SetNoDataValue(nodata_value)

    if is_masked(llh_data):
        if nodata_value != None:
            llh_data.data[llh_data.mask] = nodata_value
        band.WriteArray(llh_data.data)
    else:
        band.WriteArray(llh_data)

    # Clean up by closing the dataset

    dst_ds = None
    src_ds = None
Esempio n. 44
0
#...

#main cycle of averaging to bins
for i,x in enumerate(Xbins[:-1]):
    for j,y in enumerate(Ybins[:-1]):

        SUM = []
        for line in np.arange(len(LINES_subset)):
            LONindices = (LON_subset[line] > x) & (LON_subset[line] <= Xbins[i+1])
            LATindices = (LAT_subset[line] > y) & (LAT_subset[line] <= Ybins[j+1])
            DATAindices = np.logical_and(LONindices,LATindices)

            if np.any(DATAindices):
                local_mean = ma.mean(DATA_subset[line][DATAindices])
                if ma.is_masked(local_mean):
                    pass
                else:
                    SUM.append(float(local_mean))
        if len(SUM)==0:
            AVG[i,j] = 1e+20
        else:
            AVG[i,j] = np.mean(SUM)




#...
#set mask
AVG = ma.masked_where(AVG>=1e+19,AVG)
#XC = ma.masked_where(AVG>=1e+19,XC)
Esempio n. 45
0
def export_cbc(model,
               cbcfile,
               otfolder,
               precision='single',
               nanval=-1e+20,
               kstpkper=None,
               text=None,
               smooth=False,
               point_scalars=False,
               binary=False):
    """

    Exports cell by cell file to vtk

    Parameters
    ----------

    model : flopy model instance
        the flopy model instance
    cbcfile : str
        the cell by cell file
    otfolder : str
        output folder to write the data
    precision : str:
        binary file precision, default is 'single'
    nanval : scalar
        no data value
    kstpkper : tuple of ints or list of tuple of ints
        A tuple containing the time step and stress period (kstp, kper).
        The kstp and kper values are zero based.
    text : str or list of str
        The text identifier for the record.  Examples include
        'RIVER LEAKAGE', 'STORAGE', 'FLOW RIGHT FACE', etc.
    smooth : bool
        If true a smooth surface will be output, default is False
    point_scalars : bool
        If True point scalar values will be written, default is False
    binary : bool
        if True the output .vtu file will be binary, default is
        False.

    """

    mg = model.modelgrid
    shape = (mg.nlay, mg.nrow, mg.ncol)

    if not os.path.exists(otfolder):
        os.mkdir(otfolder)

    # set up the pvd file to make the output files time enabled
    pvdfile = open(os.path.join(otfolder, '{}_Heads.pvd'.format(model.name)),
                   'w')

    pvdfile.write("""<?xml version="1.0"?>
<VTKFile type="Collection" version="0.1"
         byte_order="LittleEndian"
         compressor="vtkZLibDataCompressor">
  <Collection>\n""")

    # load cbc

    cbb = bf.CellBudgetFile(cbcfile, precision=precision)

    # totim_dict = dict(zip(cbb.get_kstpkper(), model.dis.get_totim()))

    # get records
    records = _get_names(cbb.get_unique_record_names())

    # build imeth lookup
    imeth_dict = {
        record: imeth
        for (record, imeth) in zip(records, cbb.imethlist)
    }
    # get list of packages to export
    if text is not None:
        # build keylist
        if isinstance(text, str):
            keylist = [text]
        elif isinstance(text, list):
            keylist = text
        else:
            raise Exception('text must be type str or list of str')
    else:
        keylist = records

    if kstpkper is not None:
        if isinstance(kstpkper, tuple):
            kstplist = [kstpkper[0]]
            kperlist = [kstpkper[1]]
        elif isinstance(kstpkper, list):
            kstpkper_list = list(map(list, zip(*kstpkper)))
            kstplist = kstpkper_list[0]
            kperlist = kstpkper_list[1]

        else:
            raise Exception('kstpkper must be tuple of (kstp, kper) or list '
                            'of tuples')

    else:
        kperlist = list(set([x[1] for x in cbb.get_kstpkper() if x[1] > -1]))
        kstplist = list(set([x[0] for x in cbb.get_kstpkper() if x[0] > -1]))

    # get model name
    model_name = model.name

    vtk = Vtk(model, nanval=nanval, smooth=smooth, point_scalars=point_scalars)

    # export data
    addarray = False
    count = 1
    for kper in kperlist:
        for kstp in kstplist:

            ot_base = '{}_CBC_KPER{}_KSTP{}.vtu'.format(
                model_name, kper + 1, kstp + 1)
            otfile = os.path.join(otfolder, ot_base)
            pvdfile.write("""<DataSet timestep="{}" group="" part="0"
                         file="{}"/>\n""".format(count, ot_base))
            for name in keylist:

                try:
                    rec = cbb.get_data(kstpkper=(kstp, kper),
                                       text=name,
                                       full3D=True)

                    if len(rec) > 0:
                        array = rec[0]  # need to fix for multiple pak
                        addarray = True

                except ValueError:

                    rec = cbb.get_data(kstpkper=(kstp, kper), text=name)[0]

                    if imeth_dict[name] == 6:
                        array = np.full(shape, nanval)
                        # rec array
                        for [node, q] in zip(rec['node'], rec['q']):
                            lyr, row, col = np.unravel_index(node - 1, shape)

                            array[lyr, row, col] = q

                        addarray = True
                    else:
                        raise Exception('Data type not currently supported '
                                        'for cbc output')
                        # print('Data type not currently supported '
                        #       'for cbc output')

                if addarray:

                    # set the data to no data value
                    if ma.is_masked(array):
                        array = np.where(array.mask, nanval, array)

                    # add array to vtk
                    vtk.add_array(name.strip(), array)  # need to adjust for

            # write the vtk data to the output file
            if binary:
                vtk.write_binary(otfile)
            else:
                vtk.write(otfile)
            count += 1
    # finish writing the pvd file
    pvdfile.write("""  </Collection>
</VTKFile>""")

    pvdfile.close()
    return
Esempio n. 46
0
def ingest_sources(db,
                   sources,
                   references=None,
                   ras=None,
                   decs=None,
                   comments=None,
                   epochs=None,
                   equinoxes=None,
                   raise_error=True,
                   search_db=True):
    """
    Script to ingest sources
    TODO: better support references=None
    Parameters
    ----------
    db: astrodbkit2.astrodb.Database
        Database object created by astrodbkit2
    sources: list[str]
        Names of sources
    references: str or list[strings]
        Discovery references of sources
    ras: list[floats], optional
        Right ascensions of sources. Decimal degrees.
    decs: list[floats], optional
        Declinations of sources. Decimal degrees.
    comments: list[strings], optional
        Comments
    epochs: str or list[str], optional
        Epochs of coordinates
    equinoxes: str or list[string], optional
        Equinoxes of coordinates
    raise_error: bool, optional
        True (default): Raise an error if a source cannot be ingested
        False: Log a warning but skip sources which cannot be ingested
    search_db: bool, optional
        True (default): Search database to see if source is already ingested
        False: Ingest source without searching the database

    Returns
    -------

    None

    """
    # TODO: add example

    # SETUP INPUTS
    if ras is None and decs is None:
        coords = False
    else:
        coords = True

    if isinstance(sources, str):
        n_sources = 1
    else:
        n_sources = len(sources)

    # Convert single element input values into lists
    input_values = [sources, references, epochs, equinoxes, comments]
    for i, input_value in enumerate(input_values):
        if input_value is None:
            input_values[i] = [None] * n_sources
        elif isinstance(input_value, str):
            input_values[i] = [input_value] * n_sources
    sources, references, epochs, equinoxes, comments = input_values

    n_added = 0
    n_existing = 0
    n_names = 0
    n_alt_names = 0
    n_skipped = 0
    n_multiples = 0

    logger.info(f"Trying to add {n_sources} sources")

    # Loop over each source and decide to ingest, skip, or add alt name
    for i, source in enumerate(sources):
        # Find out if source is already in database or not
        if coords and search_db:
            name_matches = find_source_in_db(db,
                                             source,
                                             ra=ras[i],
                                             dec=decs[i])
        elif search_db:
            name_matches = find_source_in_db(db, source)
        elif not search_db:
            name_matches = []
        else:
            name_matches = None

        if len(name_matches
               ) == 1 and search_db:  # Source is already in database
            n_existing += 1
            msg1 = f"{i}: Skipping {source}. Already in database. \n "
            msg2 = f"{i}: Match found for {source}: {name_matches[0]}"
            logger.debug(msg1 + msg2)

            # Figure out if ingest name is an alternate name and add
            db_matches = db.search_object(source,
                                          output_table='Sources',
                                          fuzzy_search=False)
            if len(db_matches) == 0:
                alt_names_data = [{
                    'source': name_matches[0],
                    'other_name': source
                }]
                try:
                    db.Names.insert().execute(alt_names_data)
                    logger.debug(
                        f"{i}: Name added to database: {alt_names_data}\n")
                    n_alt_names += 1
                except sqlalchemy.exc.IntegrityError as e:
                    msg = f"{i}: Could not add {alt_names_data} to database"
                    logger.warning(msg)
                    if raise_error:
                        raise SimpleError(msg + '\n' + str(e))
                    else:
                        continue
            continue  # Source is already in database, nothing new to ingest
        elif len(
                name_matches
        ) > 1 and search_db:  # Multiple source matches in the database
            n_multiples += 1
            msg1 = f"{i} Skipping {source} "
            msg = f"{i} More than one match for {source}\n {name_matches}\n"
            logger.warning(msg1 + msg)
            if raise_error:
                raise SimpleError(msg)
            else:
                continue
        elif len(name_matches
                 ) == 0 or not search_db:  # No match in the database, INGEST!
            if coords:  # Coordinates were provided as input
                ra = ras[i]
                dec = decs[i]
                epoch = None if ma.is_masked(epochs[i]) else epochs[i]
                equinox = None if ma.is_masked(equinoxes[i]) else equinoxes[i]
            else:  # Try to get coordinates from SIMBAD
                simbad_result_table = Simbad.query_object(source)
                if simbad_result_table is None:
                    n_skipped += 1
                    msg = f"{i}: Skipping: {source}. Coordinates are needed and could not be retrieved from SIMBAD. \n"
                    logger.warning(msg)
                    if raise_error:
                        raise SimpleError(msg)
                    else:
                        continue
                elif len(simbad_result_table) == 1:
                    simbad_coords = simbad_result_table['RA'][
                        0] + ' ' + simbad_result_table['DEC'][0]
                    simbad_skycoord = SkyCoord(simbad_coords,
                                               unit=(u.hourangle, u.deg))
                    ra = simbad_skycoord.to_string(style='decimal').split()[0]
                    dec = simbad_skycoord.to_string(style='decimal').split()[1]
                    epoch = '2000'  # Default coordinates from SIMBAD are epoch 2000.
                    equinox = 'J2000'  # Default frame from SIMBAD is IRCS and J2000.
                    msg = f"Coordinates retrieved from SIMBAD {ra}, {dec}"
                    logger.debug(msg)
                else:
                    n_skipped += 1
                    msg = f"{i}: Skipping: {source}. Coordinates are needed and could not be retrieved from SIMBAD. \n"
                    logger.warning(msg)
                    if raise_error:
                        raise SimpleError(msg)
                    else:
                        continue

            logger.debug(f"{i}: Ingesting {source}. Not already in database. ")
        else:
            msg = f"{i}: unexpected condition encountered ingesting {source}"
            logger.error(msg)
            raise SimpleError(msg)

        # Construct data to be added
        source_data = [{
            'source':
            source,
            'ra':
            ra,
            'dec':
            dec,
            'reference':
            references[i],
            'epoch':
            epoch,
            'equinox':
            equinox,
            'comments':
            None if ma.is_masked(comments[i]) else comments[i]
        }]
        names_data = [{'source': source, 'other_name': source}]

        # Try to add the source to the database
        try:
            db.Sources.insert().execute(source_data)
            n_added += 1
            msg = f"Added {str(source_data)}"
            logger.debug(msg)
        except sqlalchemy.exc.IntegrityError:
            if ma.is_masked(source_data[0]
                            ['reference']):  # check if reference is blank
                msg = f"{i}: Skipping: {source}. Discovery reference is blank. \n"
                msg2 = f"\n {str(source_data)}\n"
                logger.warning(msg)
                logger.debug(msg2)
                n_skipped += 1
                if raise_error:
                    raise SimpleError(msg + msg2)
                else:
                    continue
            elif db.query(db.Publications).filter(
                    db.Publications.c.name == references[i]).count() == 0:
                # check if reference is in Publications table
                msg = f"{i}: Skipping: {source}. Discovery reference {references[i]} is not in Publications table. \n" \
                      f"(Add it with add_publication function.) \n "
                msg2 = f"\n {str(source_data)}\n"
                logger.warning(msg)
                logger.debug(msg2)
                n_skipped += 1
                if raise_error:
                    raise SimpleError(msg + msg2)
                else:
                    continue
            else:
                msg = f"{i}: Skipping: {source}. Not sure why."
                msg2 = f"\n {str(source_data)} "
                logger.warning(msg)
                logger.debug(msg2)
                n_skipped += 1
                if raise_error:
                    raise SimpleError(msg + msg2)
                else:
                    continue

        # Try to add the source name to the Names table
        try:
            db.Names.insert().execute(names_data)
            logger.debug(f"Name added to database: {names_data}\n")
            n_names += 1
        except sqlalchemy.exc.IntegrityError:
            msg = f"{i}: Could not add {names_data} to database"
            logger.warning(msg)
            if raise_error:
                raise SimpleError(msg)
            else:
                continue

    logger.info(f"Sources added to database: {n_added}")
    logger.info(f"Names added to database: {n_names} \n")
    logger.info(f"Sources already in database: {n_existing}")
    logger.info(f"Alt Names added to database: {n_alt_names}")
    logger.info(
        f"Sources NOT added to database because multiple matches: {n_multiples}"
    )
    logger.info(f"Sources NOT added to database: {n_skipped} \n")

    if n_added != n_names:
        msg = f"Number added should equal names added."
        raise SimpleError(msg)

    if n_added + n_existing + n_multiples + n_skipped != n_sources:
        msg = f"Number added + Number skipped doesn't add up to total sources"
        raise SimpleError(msg)

    return
Esempio n. 47
0
    plt.tight_layout()
    plt.show()
    sys.exit()

dataH, dataW = deltaValues.shape
pixelData = np.zeros((dataH, dataW, 4), dtype=np.uint8)
pixelDataGain = np.zeros((dataH, dataW, 4), dtype=np.uint8)
pixelDataLoss = np.zeros((dataH, dataW, 4), dtype=np.uint8)

total = dataH * dataW
print("Converting data to colors...")
for i in range(dataH):
    for j in range(dataW):
        delta = deltaValues[i, j]
        color = gainColor = lossColor = [0,0,0,0]
        if not ma.is_masked(delta):
            nvalue = norm(delta, DATA_RANGE, limit=True)
            color = getColor(COLOR_GRADIENT, nvalue)
            if nvalue > 0.5:
                gainColor = color
            else:
                lossColor = color
        pixelData[i, j] = np.array(color, dtype=np.uint8)
        pixelDataGain[i, j] = np.array(gainColor, dtype=np.uint8)
        pixelDataLoss[i, j] = np.array(lossColor, dtype=np.uint8)
        printProgress(i*dataW+j, total)

def makeImage(filename, pixelData):
    print("Writing data to image...")
    dataIm = Image.fromarray(pixelData, mode="RGBA")
    baseIm = Image.new(mode="RGBA", size=dataIm.size, color=(0, 0, 0, 255))
            try:
                s_reference_stream.trim(starttime=s_times[s_stations.index(values[parameters.index(
                    'reference_station')])] - datetime.timedelta(seconds=5),
                                      endtime=s_times[s_stations.index(values[parameters.index(
                                          'reference_station')])] + datetime.timedelta(seconds=5))
            except ValueError:
                # When there is no s data for the station, use the minimum and maximum from the other sites
                s_reference_stream.trim(starttime=min(s_times) - datetime.timedelta(seconds=5),
                                        endtime=max(s_times) + datetime.timedelta(seconds=5))
            lag_time = find_lag_time(s_station_stream, s_reference_stream)
            shift_idx = int(abs(lag_time * streams[m][0].stats.sampling_rate))
            downsampled_shift_idx = int(abs(lag_time * downsampled_streams[m][0].stats.sampling_rate))
            for n in range(len(streams[m])):

                # Ensure all data are masked arrays
                if not ma.is_masked(streams[m][n].data):
                    streams[m][n].data = ma.masked_array(streams[m][n].data)
                if not ma.is_masked(downsampled_streams[m][n].data):
                    downsampled_streams[m][n].data = ma.masked_array(downsampled_streams[m][n].data)

                # Apply shift
                if lag_time > 0:
                    nandices[0] = downsampled_shift_idx
                    shifted_streams[m][n].data = np.asarray([float('nan')] * shift_idx +
                                                            streams[m][n].data[:-shift_idx].filled(
                                                                float('nan')).tolist())
                    shifted_downsampled_streams[m][n].data = np.asarray([float('nan')] * downsampled_shift_idx +
                                                            downsampled_streams[m][n].data[
                                                            :-downsampled_shift_idx].filled(float('nan')).tolist())
                else:
                    nandices[1] = len(downsampled_streams[m][n].data) - downsampled_shift_idx
Esempio n. 49
0
def chunk_shape_nD(varShape, valSize=4, chunkSize=4096, minDim=1):
    """
    Return a 'good shape' for an nD variable, assuming balanced 1D, 2D access

    varShape  -- list of variable dimension sizes
    chunkSize -- minimum chunksize desired, in bytes (default 4096)
    valSize   -- size of each data value, in bytes (default 4)
    minDim    -- mimimum chunk dimension (if var dimension larger
                 than this value, otherwise it is just var dimension)

    Returns integer chunk lengths of a chunk shape that provides
    balanced access of 1D subsets and 2D subsets of a netCDF or HDF5
    variable var. 'Good shape' for chunks means that the number of
    chunks accessed to read any kind of 1D or 2D subset is approximately
    equal, and the size of each chunk (uncompressed) is at least
    chunkSize, which is often a disk block size.
    """

    varShapema = ma.array(varShape)

    chunkVals = min(chunkSize / float(valSize),
                    numVals(varShapema))  # ideal number of values in a chunk

    # Make an ideal chunk shape array
    chunkShape = ma.array(calcChunkShape(chunkVals, varShapema), dtype=int)

    # Short circuit for 1D arrays. Logic below unecessary & can have divide by zero
    if len(varShapema) == 1: return chunkShape.filled(fill_value=1)

    # And a copy where we'll store our final values
    chunkShapeFinal = ma.masked_all(chunkShape.shape, dtype=int)

    if chunkVals < numVals(np.minimum(varShapema, minDim)):
        while chunkVals < numVals(np.minimum(varShapema, minDim)):
            minDim -= 1
        sys.stderr.write('Mindim too large for variable, reduced to : %d\n' %
                         minDim)

    lastChunkCount = -1

    while True:

        # Loop over the axes in chunkShape, making sure they are at
        # least minDim in length.
        for i in range(len(chunkShape)):
            if ma.is_masked(chunkShape[i]):
                continue
            if (chunkShape[i] < minDim):
                # Set the final chunk shape for this dimension
                chunkShapeFinal[i] = min(minDim, varShapema[i])
                # mask it out of the array of possible chunkShapes
                chunkShape[i] = ma.masked

        # Have we fixed any dimensions and filled them in chunkShapeFinal?
        if chunkShapeFinal.count() > 0:
            chunkCount = numVals(chunkShapeFinal[~chunkShapeFinal.mask])
        else:
            if (lastChunkCount == -1):
                # Haven't modified initial guess, break out of
                # this loop and accept chunkShape
                break

        if chunkCount != lastChunkCount and len(
                varShapema[~chunkShape.mask]) > 0:
            # Recalculate chunkShape array, with reduced dimensions
            chunkShape[~chunkShape.mask] = calcChunkShape(
                chunkVals / chunkCount, varShapema[~chunkShape.mask])
            lastChunkCount = chunkCount
        else:
            break

    # This doesn't work when chunkShape has no masked values. Weird.
    # chunkShapeFinal[chunkShapeFinal.mask] = chunkShape[~chunkShape.mask]
    for i in range(len(chunkShapeFinal)):
        if ma.is_masked(chunkShapeFinal[i]):
            chunkShapeFinal[i] = chunkShape[i]

    return chunkShapeFinal.filled(fill_value=1)
def find_xcorr_window(shifted_downsampled_streams, downsampled_rss, nandices,
                      phase, parameters, values):
    """
    Find the time window of each event for which the correlation of the vertical waveforms
    at the sensors is highest: This window is that for which the normalised cross-correlation of the
    total energy traces of each sensor pair is highest.
    """

    xcorr_window = []

    # Calculate horizontal total energy for the reference stream
    reference_total_energy_waveform = calculate_total_energy(
        downsampled_rss, phase)
    reference_total_energy_waveform = np.asarray(
        smooth_data(
            reference_total_energy_waveform,
            int(2 * round(
                1 / float(values[parameters.index('lower_frequency')])))))
    for m in range(len(shifted_downsampled_streams)):
        # Calculate horizontal total energy for the station stream
        downsampled_sss = shifted_downsampled_streams[m].copy()
        shifted_stream_total_energy_waveform = calculate_total_energy(
            downsampled_sss, phase)
        shifted_stream_total_energy_waveform = np.asarray(
            smooth_data(
                shifted_stream_total_energy_waveform,
                int(2 * round(
                    1 / float(values[parameters.index('lower_frequency')])))))

        # Initiate one loop to work through each possible start time in the waveform
        normalised_xcorr_values = [
            ([0] * len(reference_total_energy_waveform))
            for y in range(len(shifted_stream_total_energy_waveform))
        ]
        for n in range(nandices[0], len(shifted_stream_total_energy_waveform)):
            if nandices[1] and n > nandices[
                    1]:  # Don't do cross-correlation past the data
                break
            # Initiate a second loop to work through each possible end time in the waveform,
            # so that all possible windows are tested, BUT require that windows are at least 1 wavelength of the
            # lowest frequency wavelet in length.
            for o in range(
                    n + int(
                        round(1 / float(
                            values[parameters.index('lower_frequency')]))) *
                    int(shifted_downsampled_streams[m][0].stats.sampling_rate)
                    + 1, len(reference_total_energy_waveform)):
                if nandices[1] and o > nandices[
                        1]:  # Don't do cross-correlation past the data
                    break
                # Calculate mean, variance for data in the given window
                x_mean = np.nanmean(shifted_stream_total_energy_waveform[n:o])
                y_mean = np.nanmean(reference_total_energy_waveform[n:o])
                x_var = np.nanvar(shifted_stream_total_energy_waveform[n:o])
                y_var = np.nanvar(reference_total_energy_waveform[n:o])
                if x_var == 0 or y_var == 0:
                    continue
                if np.isnan(x_mean) or np.isnan(y_mean) or np.isnan(
                        x_var) or np.isnan(y_var):
                    continue
                # Iterate through all values in the given window
                sum = 0
                for p in range(n, o):
                    # Skip this window if there are any nan values
                    if (np.isnan(shifted_stream_total_energy_waveform[p])
                            or ma.is_masked(
                                shifted_stream_total_energy_waveform[p])
                            or np.isnan(reference_total_energy_waveform[p]) or
                            ma.is_masked(reference_total_energy_waveform[p])):
                        break
                    sum += (
                        (shifted_stream_total_energy_waveform[p] - x_mean) *
                        (reference_total_energy_waveform[p] - y_mean))
                normalised_xcorr_value = (
                    1 / len(shifted_stream_total_energy_waveform[n:o]) * sum /
                    math.sqrt(x_var * y_var))
                # Store normalised cross-correlation values in a nested list where the first index is the window
                # start index and the second index is the window end index.
                normalised_xcorr_values[n][o] = normalised_xcorr_value
        if np.nanmax(normalised_xcorr_values) == 0:
            print(
                'Seismograms failed to find any suitable correlation window!')
            return np.nan
        normalised_xcorr_values = np.asarray(normalised_xcorr_values)
        max_normalised_xcorr_value_idx = np.unravel_index(
            np.nanargmax(normalised_xcorr_values),
            normalised_xcorr_values.shape)
        print(
            'For station ' + shifted_downsampled_streams[m][0].stats.station +
            ' maximum normalised cross-correlation value occurs between times '
            + str(downsampled_sss[0].times(
                type='utcdatetime')[max_normalised_xcorr_value_idx[0]]) +
            ' (index ' + str(max_normalised_xcorr_value_idx[0]) +
            ' in downsampled data) - ' + str(downsampled_sss[0].times(
                type='utcdatetime')[max_normalised_xcorr_value_idx[1]]) +
            ' in the aligned data (index ' +
            str(max_normalised_xcorr_value_idx[1]) +
            ' in the downsampled data)')
        print('Maximum cross-correlation value is: ' +
              str(normalised_xcorr_values[max_normalised_xcorr_value_idx]))
        if nandices[0] > max_normalised_xcorr_value_idx[0]:
            print(
                'There are ' +
                str(nandices[0] - max_normalised_xcorr_value_idx[0]) +
                ' NaN values at the front of the cross-correlation window in the downsampled aligned data'
            )
        if nandices[1] and nandices[1] < max_normalised_xcorr_value_idx[1]:
            print(
                'There are ' +
                str(max_normalised_xcorr_value_idx[1] - nandices[1]) +
                ' NaN values at the end of the cross-correlation window in the downsampled aligned data'
            )
        xcorr_window.append([
            downsampled_sss[0].times(
                type='utcdatetime')[max_normalised_xcorr_value_idx[0]],
            downsampled_sss[0].times(
                type='utcdatetime')[max_normalised_xcorr_value_idx[1]]
        ])

    return xcorr_window
Esempio n. 51
0
trackLen_polar_nh = []
trackLen_polar_sh = []

for x in range(0, niters):
    #for x in range(0,60000):
    if x % 10000 == 0:
        print("On track {0}/{1}".format(x, niters))

    if x < len_nh:

        if trackLen_nh[
                x] < 8:  # checking to make sure TPV track was longer than two days
            continue

        lat_nh = data_nh.variables['latExtr'][x, :]
        if not ma.is_masked(lat_nh):
            per_life_in_arctic = float(
                np.where(lat_nh >= 60)[0].shape[0]) / float(
                    lat_nh.shape[0]
                )  # checking if TPV spent 60% of lifetime in Arctic
        else:
            per_life_in_arctic = float(
                np.where((lat_nh.data >= 60)
                         & (lat_nh.mask != True))[0].shape[0]) / float(
                             np.where((lat_nh.mask != True))[0].shape[0])

        if per_life_in_arctic * 100 >= 60.:
            lifetimes_polar_nh.append(data_nh.variables['lenTrack'][x] / hinc)
            months_polar_nh.append(trackStartMon_nh[x])
            trackLen_polar_nh.append(data_nh.variables['lenTrack'][x])
def find_lag_time(stream, reference_stream, phase):
    """
    Find the lag time between a given stream and a reference stream using cross-correlation of the
    total energy, which is independent of component alignment.
    :param stream: obspy stream object of the seismogram to calculate lag time for
    :param reference_stream: object stream object of the seismogram to use as reference for lag time calculation
    :param phase: whether to use total energy on vertical channel (P phase) or horizontal channels (S phase)
    :return: lag time between the two sensors in seconds relative to the reference stream
    """

    # Create normalised amplitude envelopes of the data using horizontal total energy
    stream_envelope = calculate_total_energy(stream, phase)
    max_se = ma.max(stream_envelope)
    stream_envelope /= max_se
    ref_envelope = calculate_total_energy(reference_stream, phase)
    max_re = ma.max(ref_envelope)
    ref_envelope /= max_re

    # Ensure all data are masked arrays
    if not ma.is_masked(stream_envelope):
        stream_envelope = ma.masked_array(stream_envelope)
    if not ma.is_masked(ref_envelope):
        ref_envelope = ma.masked_array(ref_envelope)

    # Find the lag time from the maximum cross-correlation value between the two waveforms
    xcorr_values = []
    ref_envelope = ref_envelope.filled(0).tolist() + len(stream_envelope) * [0]
    ref_envelope = np.asarray(
        smooth_data(
            ref_envelope,
            int(2 * round(
                1 / float(values[parameters.index('lower_frequency')])))))
    for m in range(2 * len(stream_envelope)):

        # Shift the stream
        if m <= len(stream_envelope):
            shifted_stream_envelope = (len(stream_envelope) - m) * [
                0
            ] + stream_envelope.filled(0).tolist() + m * [0]
        else:
            shifted_stream_envelope = max(0, len(stream_envelope) - m) * [0] + \
                                      stream_envelope[:len(stream_envelope) - m].filled(0).tolist() + m * [0]
        shifted_stream_envelope = np.asarray(
            smooth_data(
                shifted_stream_envelope,
                int(2 * round(
                    1 / float(values[parameters.index('lower_frequency')])))))

        # Perform cross-correlation
        try:
            xcorr_value = np.corrcoef(shifted_stream_envelope,
                                      ref_envelope)[0][1]
        except ValueError:
            print(
                'Cross-correlation failed! Perhaps one stream is a data point different to the other? '
                'This can occur for certain corner frequency and data length combinations... It is a bug.'
            )
        xcorr_values.append(xcorr_value)

    # Find lag time from highest cross-correlation value
    max_xcorr_value = max(xcorr_values)
    print(
        'Correlation value at best alignment of total energy waveforms is: ' +
        str(max_xcorr_value))
    lag_time = (1 / stream[0].stats.sampling_rate) * (
        len(stream_envelope) - xcorr_values.index(max_xcorr_value))

    return lag_time
Esempio n. 53
0
def fit_histogram_gaussian_function(data_array, bin_width):
    """ This function fits a Gaussian function to a specific set 
    of data.

    Gaussian fitting is hard, this function exists as a port so that 
    all fitting functions use the same algorithm and said algorithm 
    is easy to change. This applies it to the histogram of the data.
    
    Parameters
    ----------
    data_array : ndarray
        The data that the histogram Gaussian function is fitting.
    bin_width : float
        The width of the bins to use for the histogram fitting 
        function.

    Returns
    -------
    gaussian_function : function
        A callable function that when provided an X value, it will 
        return the value of the function.
    gaussian_parameters : dictionary
        A compiled dictionary of all of the parameters of the 
        Gaussian fit.
    """

    # Be able to accept both masked arrays and standard arrays and
    # be able to tell.
    if (np_ma.is_masked(data_array)):
        flat_data = data_array.compressed()
    else:
        flat_data = data_array.flatten()

    # Numpy does not support histogram bin widths, instead using
    # bins defined by values in an array. Converting equal bin
    # widths to this array.
    hist_bins = core.math.generate_numpy_bin_width_array(data_array=flat_data,
                                                         bin_width=bin_width)

    # Extract histogram data from the data. The x locations are in
    # the middle of the bin.
    hist_data = np.histogram(flat_data, bins=hist_bins)
    hist_x = (hist_data[1][0:-1] + hist_data[1][1:]) / 2
    hist_y = hist_data[0]

    # Determine the initial guesses of the Gaussian histogram
    # fit. So far magic is the best way.
    guess_mean, guess_stddev, guess_amplitude = \
        core.magic.magic_inital_gaussian_parameters(x_data=hist_x,
                                                    y_data=hist_y)

    # Do the Gaussian fit.
    inital_guesses = {
        'mean': guess_mean,
        'stddev': guess_stddev,
        'amplitude': guess_amplitude
    }
    gauss_funct, gauss_param = fit_gaussian_function(hist_x, hist_y,
                                                     inital_guesses)
    # For naming convention.
    gaussian_function = gauss_funct
    gaussian_parameters = gauss_param
    return gaussian_function, gaussian_parameters
Esempio n. 54
0
    def combine_sigma_clip(
            cls, file_names: [str], sigma_threshold: float,
            calibrator: Calibrator, console: Console,
            session_controller: SessionController) -> Optional[ndarray]:
        console.push_level()
        console.message(
            f"Combine by sigma-clipped mean, z-score threshold {sigma_threshold}",
            +1)
        sample_file = RmFitsUtil.make_file_descriptor(file_names[0])

        file_data = numpy.asarray(RmFitsUtil.read_all_files_data(file_names))
        cls.check_cancellation(session_controller)

        file_data = calibrator.calibrate_images(file_data, sample_file,
                                                console, session_controller)
        cls.check_cancellation(session_controller)

        console.message("Calculating unclipped means", +1)
        column_means = numpy.mean(file_data, axis=0)
        cls.check_cancellation(session_controller)

        console.message("Calculating standard deviations", 0)
        column_stdevs = numpy.std(file_data, axis=0)
        cls.check_cancellation(session_controller)
        console.message("Calculating z-scores", 0)
        # Now what we'd like to do is just:
        #    z_scores = abs(file_data - column_means) / column_stdevs
        # Unfortunately, standard deviations can be zero, so that simplistic
        # statement would generate division-by-zero errors.
        # Std for a column would be zero if all the values in the column were identical.
        # In that case we wouldn't want to eliminate any anyway, so we'll set the
        # zero stdevs to a large number, which causes the z-scores to be small, which
        # causes no values to be eliminated.
        column_stdevs[column_stdevs == 0.0] = sys.float_info.max
        z_scores = abs(file_data - column_means) / column_stdevs
        cls.check_cancellation(session_controller)

        console.message("Eliminated data outside threshold", 0)
        exceeds_threshold = z_scores > sigma_threshold
        cls.check_cancellation(session_controller)

        # Calculate and display how much data we are ignoring
        dimensions = exceeds_threshold.shape
        total_pixels = dimensions[0] * dimensions[1] * dimensions[2]
        number_masked = numpy.count_nonzero(exceeds_threshold)
        percentage_masked = 100.0 * number_masked / total_pixels
        console.message(
            f"Discarded {number_masked:,} pixels of {total_pixels:,} "
            f"({percentage_masked:.3f}% of data)", +1)

        masked_array = ma.masked_array(file_data, exceeds_threshold)
        cls.check_cancellation(session_controller)
        console.message("Calculating adjusted means", -1)
        masked_means = ma.mean(masked_array, axis=0)
        cls.check_cancellation(session_controller)

        # If the means matrix contains any masked values, that means that in that column the clipping
        # eliminated *all* the data.  We will find the offending columns and re-calculate those using
        # simple min-max clipping.
        if ma.is_masked(masked_means):
            console.message(
                "Some columns lost all their values; min-max clipping those columns.",
                0)
            #  Get the mask, and get a 2D matrix showing which columns were entirely masked
            eliminated_columns_map = ndarray.all(exceeds_threshold, axis=0)
            masked_coordinates = numpy.where(eliminated_columns_map)
            x_coordinates = masked_coordinates[0]
            y_coordinates = masked_coordinates[1]
            assert len(x_coordinates) == len(y_coordinates)
            for index in range(len(x_coordinates)):
                cls.check_cancellation(session_controller)
                column_x = x_coordinates[index]
                column_y = y_coordinates[index]
                column = file_data[:, column_x, column_y]
                min_max_clipped_mean: int = round(
                    cls.calc_mm_clipped_mean(column, 2, console,
                                             session_controller))
                masked_means[column_x, column_y] = min_max_clipped_mean
            # We've replaced the problematic columns, now the mean should calculate cleanly
            assert not ma.is_masked(masked_means)
        cls.check_cancellation(session_controller)
        console.pop_level()
        result = masked_means.round().filled()
        return result
Esempio n. 55
0
def ingest_spectra(db,
                   sources,
                   spectra,
                   regimes,
                   telescopes,
                   instruments,
                   modes,
                   obs_dates,
                   references,
                   wavelength_units=None,
                   flux_units=None,
                   wavelength_order=None,
                   comments=None,
                   other_references=None,
                   raise_error=True):
    """

    Parameters
    ----------
    db: astrodbkit2.astrodb.Database
    sources: list[str]
        List of source names
    spectra: list[str]
        List of filenames corresponding to spectra files
    regimes: str or list[str]
        List or string
    telescopes: str or list[str]
        List or string
    instruments: str or list[str]
        List or string
    modes: str or list[str]
        List or string
    obs_dates: str or datetime
        List of strings or datetime objects
    references: list[str]
        List or string
    wavelength_units: list[str] or Quantity, optional
        List or string
    flux_units: list[str] or Quantity, optional
        List or string
    wavelength_order: list[int], optional
    comments: list[str], optional
        List of strings
    other_references: list[str], optional
        List of strings
    raise_error: bool

    """

    # Convert single value input values to lists
    input_values = [
        regimes, telescopes, instruments, modes, wavelength_order,
        wavelength_units, flux_units, references
    ]
    for i, input_value in enumerate(input_values):
        if isinstance(input_value, str):
            print, input_value
            input_values[i] = [input_value] * len(sources)
        elif isinstance(input_value, type(None)):
            print, input_value
            input_values[i] = [None] * len(sources)
    regimes, telescopes, instruments, modes, wavelength_order, wavelength_units, flux_units, references = input_values

    n_spectra = len(spectra)
    n_skipped = 0
    n_dupes = 0
    n_missing_instrument = 0
    n_added = 0
    n_blank = 0

    msg = f'Trying to add {n_spectra} spectra'
    logger.info(msg)

    for i, source in enumerate(sources):
        # TODO: check that spectrum can be read by astrodbkit

        # Get source name as it appears in the database
        db_name = find_source_in_db(db, source)

        if len(db_name) != 1:
            msg = f"No unique source match for {source} in the database"
            raise SimpleError(msg)
        else:
            db_name = db_name[0]

        # Check if spectrum file is accessible
        # First check for internet
        internet = check_internet_connection()
        if internet:
            request_response = requests.head(spectra[i])
            status_code = request_response.status_code  # The website is up if the status code is 200
            if status_code != 200:
                n_skipped += 1
                msg = "The spectrum location does not appear to be valid: \n" \
                        f'spectrum: {spectra[i]} \n' \
                        f'status code: {status_code}'
                logger.error(msg)
                if raise_error:
                    raise SimpleError(msg)
                else:
                    continue
            else:
                msg = f"The spectrum location appears up: {spectra[i]}"
                logger.debug(msg)
        else:
            msg = "No internet connection. Internet is needed to check spectrum files."
            raise SimpleError(msg)

        # Find what spectra already exists in database for this source
        source_spec_data = db.query(
            db.Spectra).filter(db.Spectra.c.source == db_name).table()

        # SKIP if observation date is blank
        # TODO: try to populate obs date from meta data in spectrum file
        if ma.is_masked(obs_dates[i]) or obs_dates[i] == '':
            obs_date = None
            missing_obs_msg = f"Skipping spectrum with missing observation date: {source} \n"
            missing_row_spe = f"{source, obs_dates[i], references[i]} \n"
            logger.info(missing_obs_msg)
            logger.debug(missing_row_spe)
            n_blank += 1
            continue
        else:
            try:
                obs_date = pd.to_datetime(
                    obs_dates[i]
                )  # TODO: Another method that doesn't require pandas?
            except dateutil.parser._parser.ParserError:
                n_skipped += 1
                if raise_error:
                    msg = f"{source}: Can't convert obs date to Date Time object: {obs_dates[i]}"
                    logger.error(msg)
                    raise SimpleError
                else:
                    msg = f"Skipping {source} Can't convert obs date to Date Time object: {obs_dates[i]}"
                    logger.warning(msg)
                continue

        # TODO: make it possible to ingest units and order
        row_data = [{
            'source':
            db_name,
            'spectrum':
            spectra[i],
            'local_spectrum':
            None,  # if ma.is_masked(local_spectra[i]) else local_spectra[i],
            'regime':
            regimes[i],
            'telescope':
            telescopes[i],
            'instrument':
            None if ma.is_masked(instruments[i]) else instruments[i],
            'mode':
            None if ma.is_masked(modes[i]) else modes[i],
            'observation_date':
            obs_date,
            'wavelength_units':
            None if ma.is_masked(wavelength_units[i]) else wavelength_units[i],
            'flux_units':
            None if ma.is_masked(flux_units[i]) else flux_units[i],
            'wavelength_order':
            None if ma.is_masked(wavelength_order[i]) else wavelength_order[i],
            'comments':
            None if ma.is_masked(comments[i]) else comments[i],
            'reference':
            references[i],
            'other_references':
            None if ma.is_masked(other_references[i]) else other_references[i]
        }]
        logger.debug(row_data)

        try:
            db.Spectra.insert().execute(row_data)
            n_added += 1
        except sqlalchemy.exc.IntegrityError as e:
            # TODO: add elif to check if reference is in Publications Table

            if "CHECK constraint failed: regime" in str(e):
                msg = f"Regime provided is not in schema: {regimes[i]}"
                logger.error(msg)
                if raise_error:
                    raise SimpleError(msg)
                else:
                    continue

            # check telescope, instrument, mode exists
            telescope = db.query(db.Telescopes).filter(
                db.Telescopes.c.name == row_data[0]['telescope']).table()
            instrument = db.query(db.Instruments).filter(
                db.Instruments.c.name == row_data[0]['instrument']).table()
            mode = db.query(db.Modes).filter(
                db.Modes.c.name == row_data[0]['mode']).table()

            if len(source_spec_data) > 0:  # Spectra data already exists
                # check for duplicate measurement
                ref_dupe_ind = source_spec_data['reference'] == references[i]
                date_dupe_ind = source_spec_data[
                    'observation_date'] == obs_date
                instrument_dupe_ind = source_spec_data[
                    'instrument'] == instruments[i]
                mode_dupe_ind = source_spec_data['mode'] == modes[i]
                if sum(ref_dupe_ind) and sum(date_dupe_ind) and sum(
                        instrument_dupe_ind) and sum(mode_dupe_ind):
                    msg = f"Skipping suspected duplicate measurement\n{source}\n"
                    msg2 = f"{source_spec_data[ref_dupe_ind]['source', 'instrument', 'mode', 'observation_date', 'reference']}"
                    msg3 = f"{instruments[i], modes[i], obs_date, references[i], spectra[i]} \n"
                    logger.warning(msg)
                    logger.debug(msg2 + msg3 + str(e))
                    n_dupes += 1
                    if raise_error:
                        raise SimpleError
                    else:
                        continue  # Skip duplicate measurement
                # else:
                #     msg = f'Spectrum could not be added to the database (other data exist): \n ' \
                #           f"{source, instruments[i], modes[i], obs_date, references[i], spectra[i]} \n"
                #     msg2 = f"Existing Data: \n "
                #            # f"{source_spec_data[ref_dupe_ind]['source', 'instrument', 'mode', 'observation_date', 'reference', 'spectrum']}"
                #     msg3 = f"Data not able to add: \n {row_data} \n "
                #     logger.warning(msg + msg2)
                #     source_spec_data[ref_dupe_ind][
                #               'source', 'instrument', 'mode', 'observation_date', 'reference', 'spectrum'].pprint_all()
                #     logger.debug(msg3)
                #     n_skipped += 1
                #     continue
            if len(instrument) == 0 or len(mode) == 0 or len(telescope) == 0:
                msg = f'Spectrum for {source} could not be added to the database. \n' \
                      f' Telescope, Instrument, and/or Mode need to be added to the appropriate table. \n' \
                      f" Trying to find telescope: {row_data[0]['telescope']}, instrument: {row_data[0]['instrument']}, " \
                      f" mode: {row_data[0]['mode']} \n" \
                      f" Telescope: {telescope}, Instrument: {instrument}, Mode: {mode} \n"
                logger.error(msg)
                n_missing_instrument += 1
                if raise_error:
                    raise SimpleError
                else:
                    continue
            else:
                msg = f'Spectrum for {source} could not be added to the database for unknown reason: \n {row_data} \n '
                logger.error(msg)
                raise SimpleError(msg)

    msg = f"SPECTRA ADDED: {n_added} \n" \
          f" Spectra with blank obs_date: {n_blank} \n" \
          f" Suspected duplicates skipped: {n_dupes}\n" \
          f" Missing Telescope/Instrument/Mode: {n_missing_instrument} \n" \
          f" Spectra skipped for unknown reason: {n_skipped} \n"
    logger.info(msg)

    if n_added + n_dupes + n_blank + n_skipped + n_missing_instrument != n_spectra:
        msg = "Numbers don't add up: "
        logger.error(msg)
        raise SimpleError(msg)

    spec_count = db.query(Spectra.regime, func.count(Spectra.regime)).group_by(
        Spectra.regime).all()

    spec_ref_count = db.query(Spectra.reference, func.count(Spectra.reference)). \
        group_by(Spectra.reference).order_by(func.count(Spectra.reference).desc()).limit(20).all()

    telescope_spec_count = db.query(Spectra.telescope, func.count(Spectra.telescope)). \
        group_by(Spectra.telescope).order_by(func.count(Spectra.telescope).desc()).limit(20).all()

    logger.info(
        f'Spectra in the database: \n {spec_count} \n {spec_ref_count} \n {telescope_spec_count}'
    )

    return
def align_seismograms(stations, arrival_times, streams, downsampled_streams,
                      reference_station_stream, downsampled_rss, phase,
                      parameters, values):
    """
    Align all seismograms for each event to facilitate cross-correlation:
    Use the lag time that produces the maximum cross-correlation value between each sensor and the reference
    sensor's energy traces. In this process, convert all numpy masked arrays to numpy arrays with nan
    values as mask fill values.
    """

    nandices = [0, None]
    shifted_streams = streams
    shifted_downsampled_streams = downsampled_streams
    for m in range(len(streams)):
        # Cut the lag time streams to 5 seconds before and after the arrival at each site
        station_stream = downsampled_streams[m].copy()
        try:
            station_stream.trim(
                starttime=UTCDateTime(
                    arrival_times[stations.index(stations[m])] -
                    datetime.timedelta(seconds=5)),
                endtime=UTCDateTime(
                    arrival_times[stations.index(stations[m])] +
                    datetime.timedelta(seconds=5)))
        except ValueError:
            # When there is no data for the station, use the minimum and maximum from the other sites
            station_stream.trim(starttime=UTCDateTime(
                min(arrival_times) - datetime.timedelta(seconds=5)),
                                endtime=UTCDateTime(
                                    max(arrival_times) +
                                    datetime.timedelta(seconds=5)))
        reference_stream = downsampled_rss.copy()
        try:
            reference_stream.trim(
                starttime=UTCDateTime(arrival_times[stations.index(values[
                    parameters.index('reference_station')])] -
                                      datetime.timedelta(seconds=5)),
                endtime=UTCDateTime(arrival_times[stations.index(values[
                    parameters.index('reference_station')])] +
                                    datetime.timedelta(seconds=5)))
        except ValueError:
            # When there is no data for the station, use the minimum and maximum from the other sites
            reference_stream.trim(starttime=UTCDateTime(
                min(arrival_times) - datetime.timedelta(seconds=5)),
                                  endtime=UTCDateTime(
                                      max(arrival_times) +
                                      datetime.timedelta(seconds=5)))
        lag_time = find_lag_time(station_stream, reference_stream, phase)
        shift_idx = int(abs(lag_time * streams[m][0].stats.sampling_rate))
        downsampled_shift_idx = int(
            abs(lag_time * downsampled_streams[m][0].stats.sampling_rate))
        for n in range(len(streams[m])):

            # Ensure all data are masked arrays
            if not ma.is_masked(streams[m][n].data):
                streams[m][n].data = ma.masked_array(streams[m][n].data)
            if not ma.is_masked(downsampled_streams[m][n].data):
                downsampled_streams[m][n].data = ma.masked_array(
                    downsampled_streams[m][n].data)

            # Apply shift
            if lag_time > 0:
                nandices[0] = downsampled_shift_idx
                shifted_streams[m][n].data = np.asarray(
                    [float('nan')] * shift_idx + streams[m]
                    [n].data[:-shift_idx].filled(float('nan')).tolist())
                shifted_downsampled_streams[m][n].data = np.asarray(
                    [float('nan')] * downsampled_shift_idx +
                    downsampled_streams[m][n].data[:-downsampled_shift_idx].
                    filled(float('nan')).tolist())
            else:
                nandices[1] = len(
                    downsampled_streams[m][n].data) - downsampled_shift_idx
                shifted_streams[m][n].data = np.asarray(
                    streams[m][n].data[shift_idx:].filled(float(
                        'nan')).tolist() + [float('nan')] * shift_idx)
                shifted_downsampled_streams[m][n].data = np.asarray(
                    downsampled_streams[m][n].data[downsampled_shift_idx:].
                    filled(float('nan')).tolist() +
                    [float('nan')] * downsampled_shift_idx)

        print(shifted_streams[m][0].stats.station +
              ' seismograms have been aligned to the reference station by '
              'appling a shift of ' + str(lag_time) + ' seconds')

    for m in range(len(reference_station_stream)):
        if ma.is_masked(reference_station_stream[m].data):
            reference_station_stream[m].data = reference_station_stream[
                m].data.filled(float('nan'))
            downsampled_rss[m].data = downsampled_rss[m].data.filled(
                float('nan'))

    return nandices, reference_station_stream, downsampled_rss, shifted_streams, shifted_downsampled_streams
Esempio n. 57
0
def write_numpy_to_gdal(data,
                        geotransform,
                        wkt_proj,
                        dst_filename,
                        gdal_format='GTiff',
                        origin_up=True,
                        options=None,
                        nodata_value=None):
    """Given numpy data and projection information, write to a gdal file.

    Parameters
    ----------

    data :
        a 2D numpy array
    geotransform :
        a list containing the affine transformation
        (e.g., the result of gdal data_set.GetGeoTransform())
    wkt_proj :
        well known text projection information
        (e.g., the data_set.GetProjection() )
    dst_filename : str
        destination file name
    origin_up : bool
        if origin_up == True, the data is reversed in its first axis
    option :
        options to pass to gdal.
    nodata_value :
        nodata_value value. If None, no nodata_value value is set.
    """

    gdal_type = get_gdal_type(data.dtype)

    # Get the driver and open the output file

    driver = gdal.GetDriverByName(gdal_format)
    if driver == None:
        raise Exception('Unimplented gdal driver: %s' % driver)

    dst_ds = driver.Create(dst_filename,
                           data.shape[1],
                           data.shape[0],
                           bands=1,
                           eType=gdal_type)  #, options=options )

    # Set all of the transform information

    if origin_up:
        data = data[::-1, :]

    dst_ds.SetGeoTransform(geotransform)
    dst_ds.SetProjection(wkt_proj)

    # Now write the raster

    band = dst_ds.GetRasterBand(1)

    if nodata_value != None:
        band.SetNoDataValue(nodata_value)

    if is_masked(data):
        if nodata_value != None:
            data.data[data.mask] = nodata_value
        band.WriteArray(data.data)
    else:
        band.WriteArray(data.data)

    # Clean up by closing the dataset

    dst_ds = None
    src_ds = None
    ans = dat[:][ii]
    print(ii + 1, ans[2])
    #print(ans[2], ans[9], ans[10])

    # import pdb ; pdb.set_trace()

    outfile.write(r'{\large {\bf ' + ans[2] + r'}}\\' + '\n')
    #   outfile.write(r'\vspace*{1mm}'+'\n')

    outfile.write(r'Email: {}\\'.format(ans[5]) + '\n')
    outfile.write(r'Cell: {}\\'.format(ans[6]) + '\n')
    outfile.write(r'Class: {}\\'.format(ans[3]) + '\n')
    outfile.write(r'Anticipated Graduation Date: {}\\'.format(ans[4]) + '\n')

    outfile.write(r'Major(s): {}\\'.format(ans[7]) + '\n')
    if ma.is_masked(ans[8]):
        outfile.write(r'Minor(s): None\\' + '\n')
    else:
        outfile.write(r'Minor(s): {}\\'.format(ans[8]) + '\n')
    outfile.write(r'Overall GPA: {:.5}\\'.format(str(ans[9])) + '\n')
    outfile.write(r'Major GPA: {:.5}\\'.format(str(ans[10])) + '\n')
    outfile.write('\n ')
    outfile.write(r'\vspace*{1mm}' + '\n')
    for jj in range(11, 20):
        #print(ii, jj, ans[jj])
        outfile.write(r'{\bf ' + head[jj] + r'}\\' + '\n')
        if ma.is_masked(ans[jj]):
            outfile.write(r'\vspace*{3mm}' + '\n')
        else:
            outfile.write(ans[jj].replace('&', ',') + '\n')
        #   for jj in range(len(indx)):
Esempio n. 59
0
def plot_data_ax(fig,
                 ax,
                 matrix,
                 times,
                 wavelengths,
                 symlog=True,
                 t_unit='ps',
                 z_unit=dA_unit,
                 cmap='diverging',
                 z_lim=(None, None),
                 t_lim=(None, None),
                 w_lim=(None, None),
                 linthresh=1,
                 linscale=1,
                 D_mul_factor=1e3,
                 n_lin_bins=10,
                 n_log_bins=10,
                 plot_tilts=True,
                 y_major_formatter=ScalarFormatter(),
                 x_minor_locator=AutoMinorLocator(10),
                 x_major_locator=None,
                 n_levels=30,
                 plot_countours=True,
                 colorbar_locator=MultipleLocator(50),
                 colorbarpad=0.04,
                 diverging_white_cmap_tr=0.98,
                 hatch='/////',
                 colorbar_aspect=35,
                 add_wn_axis=True,
                 x_label="Wavelength / nm"):
    """data is individual dataset"""

    # assert type(data) == Data

    t_lim = (times[0] if t_lim[0] is None else t_lim[0],
             times[-1] if t_lim[1] is None else t_lim[1])
    w_lim = (wavelengths[0] if w_lim[0] is None else w_lim[0],
             wavelengths[-1] if w_lim[1] is None else w_lim[1])

    D = matrix.copy() * D_mul_factor

    zmin = np.min(D) if z_lim[0] is None else z_lim[0]
    zmax = np.max(D) if z_lim[1] is None else z_lim[1]

    if z_lim[0] is not None:
        D[D < zmin] = zmin

    if z_lim[1] is not None:
        D[D > zmax] = zmax

    register_div_cmap(zmin, zmax)
    register_div_white_cmap(zmin, zmax, diverging_white_cmap_tr)

    x, y = np.meshgrid(
        wavelengths,
        times)  # needed for pcolormesh to correctly scale the image

    # plot data matrix D

    set_main_axis(ax,
                  xlim=w_lim,
                  ylim=t_lim,
                  x_label=x_label,
                  y_label=f'Time delay / {t_unit}',
                  x_minor_locator=x_minor_locator,
                  x_major_locator=x_major_locator,
                  y_minor_locator=None)
    if add_wn_axis:
        w_ax = setup_wavenumber_axis(ax, x_major_locator=MultipleLocator(0.5))
        w_ax.tick_params(which='minor', direction='out')
        w_ax.tick_params(which='major', direction='out')

    #     ax.set_facecolor((0.8, 0.8, 0.8, 1))
    if ma.is_masked(
            D
    ):  # https://stackoverflow.com/questions/41664850/hatch-area-using-pcolormesh-in-basemap
        m_idxs = np.argwhere(D.mask[0] > 0).squeeze()
        wl_range = [wavelengths[m_idxs[0] - 1], wavelengths[m_idxs[-1] + 1]]
        ax.fill_between(wl_range, [t_lim[0], t_lim[0]], [t_lim[1], t_lim[1]],
                        facecolor="none",
                        hatch=hatch,
                        edgecolor="k",
                        linewidth=0.0)

    #     mappable = ax.pcolormesh(x, y, D, cmap=cmap, vmin=zmin, vmax=zmax)
    levels = get_sym_space(zmin, zmax, n_levels)
    mappable = ax.contourf(x,
                           y,
                           D,
                           cmap=cmap,
                           vmin=zmin,
                           vmax=zmax,
                           levels=levels,
                           antialiased=True)

    if plot_countours:
        cmap_colors = cm.get_cmap(cmap)
        colors = cmap_colors(np.linspace(0, 1, n_levels + 1))
        colors *= 0.45  # plot contours as darkens colors of colormap, blue -> darkblue, white -> gray ...
        ax.contour(x,
                   y,
                   D,
                   colors=colors,
                   levels=levels,
                   antialiased=True,
                   linewidths=0.1,
                   alpha=1,
                   linestyles='-')

    ax.invert_yaxis()

    ax.tick_params(which='major', direction='out')
    ax.tick_params(which='minor', direction='out')
    ax.yaxis.set_ticks_position('both')

    ax.set_axisbelow(False)

    fig.colorbar(mappable,
                 ax=ax,
                 label=z_unit,
                 orientation='vertical',
                 aspect=colorbar_aspect,
                 pad=colorbarpad,
                 ticks=colorbar_locator)

    if symlog:
        ax.set_yscale('symlog',
                      subs=[2, 3, 4, 5, 6, 7, 8, 9],
                      linscale=linscale,
                      linthresh=linthresh)
        ax.yaxis.set_major_locator(
            MajorSymLogLocator(base=10, linthresh=linthresh))
        ax.yaxis.set_minor_locator(
            MinorSymLogLocator(linthresh,
                               n_lin_ints=n_lin_bins,
                               n_log_ints=n_log_bins,
                               base=10))

        if plot_tilts:
            norm = c.SymLogNorm(vmin=t_lim[0],
                                vmax=t_lim[1],
                                linscale=linscale,
                                linthresh=linthresh,
                                base=10,
                                clip=True)
            _plot_tilts(ax, norm, linthresh, 'y', inverted_axis=True)

    if y_major_formatter:
        ax.yaxis.set_major_formatter(y_major_formatter)
Esempio n. 60
0
def on_campus_arr(filename):
    schedule = []
    with open(filename) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for line in csv_reader:
            schedule.append(line[1])
            schedule.append(line[-1])

        schedule = np.asarray(schedule)
        schedule = schedule.reshape((-1, 2))
        masked = ma.masked_where(schedule == 'to be arranged', schedule)
        masked = ma.masked_where(masked == '0', masked)

        unmasked = []
        index = 0
        for item in masked:
            if ma.is_masked(item) == False:
                unmasked.append(schedule[index])
            index += 1

        unmasked = np.asarray(unmasked)

        final_schedule = np.empty((unmasked.shape[0], 4), dtype='U14')

        for i in range(len(unmasked)):

            final_schedule[i, 3] = unmasked[i, 1]

            day_time = unmasked[i, 0].split(' ')
            unmasked[i, 0] = day_time[0]
            unmasked[i, 1] = day_time[1]
            final_schedule[i, 0] = unmasked[i, 0]

            hour = unmasked[i, 1].split('-')
            unmasked[i, 0] = hour[0]
            unmasked[i, 1] = hour[1]

            final_schedule[i, 1] = unmasked[i, 0]
            final_schedule[i, 2] = unmasked[i, 1]

            if final_schedule[i, 2][-1] == 'P':
                final_schedule[i, 1] = int(final_schedule[i, 1]) + 1200
                final_schedule[i, 2] = int(final_schedule[i, 2][0:-1]) + 1200

            if int(final_schedule[i, 1]) > int(final_schedule[i, 2]):
                final_schedule[i, 2] = int(final_schedule[i, 2]) + 1200

            if int(final_schedule[i, 2]) < 800:
                final_schedule[i, 1] = int(final_schedule[i, 1]) + 1200
                final_schedule[i, 2] = int(final_schedule[i, 2]) + 1200

            if int(final_schedule[i, 1]) < 800 and int(final_schedule[i, 2]):
                final_schedule[i, 1] = int(final_schedule[i, 1]) + 1200
                final_schedule[i, 2] = int(final_schedule[i, 2]) + 1200

            if (final_schedule[i, 2][-2:] == '05'
                    or final_schedule[i, 2][-2:] == '10'):
                if r.randint(2) == 0:
                    final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '00'
                else:
                    final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '15'

            if (final_schedule[i, 2][-2:] == '20'
                    or final_schedule[i, 2][-2:] == '25'):
                if r.randint(2) == 0:
                    final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '15'
                else:
                    final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '30'

            if (final_schedule[i, 2][-2:] == '35'
                    or final_schedule[i, 2][-2:] == '40'):
                if r.randint(2) == 0:
                    final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '30'
                else:
                    final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '45'

            if (final_schedule[i, 2][-2:] == '50'
                    or final_schedule[i, 2][-2:] == '55'):
                if r.randint(2) == 0:
                    final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '45'
                else:
                    final_schedule[i, 2] = final_schedule[i, 2][0:-2] + '00'
                    temp_str = int(final_schedule[i, 2]) + 100
                    final_schedule[i, 2] = str(int(temp_str))

            if (final_schedule[i, 1][-2:] != '00'):
                convert_to_quarter = int(final_schedule[i, 1][-2:]) * 100 / 60
                convert_to_quarter = str(int(convert_to_quarter))

                time_str = final_schedule[i, 1][0:-2]
                time_str += convert_to_quarter
                final_schedule[i, 1] = time_str

            if (final_schedule[i, 2][-2:] != '00'):
                convert_to_quarter = int(final_schedule[i, 2][-2:]) * 100 / 60
                convert_to_quarter = str(int(convert_to_quarter))

                time_str = final_schedule[i, 2][0:-2]
                time_str += convert_to_quarter
                final_schedule[i, 2] = time_str

    #- from 0 - 24 = 24hrs = 2400
    #- 25 mins a timestep
    #- 2400 / 25 = 96 steps

    #- 800 - 8:00, 825 = 8:15, 850 = 8:30, 875 = 8:45
    #- 1600 - 4:00, 1625 = 4:15, 1650 = 4:30, 1675 = 4:45
    STEPS = 25
    BEGINNING = 0
    # Can change the beginning value to remove all uncessary
    # zeros in the 2D array

    on_campus = np.zeros((96, 5))

    def save2arr(day, in_time, out_time, destination, value):
        start = int(in_time)
        end = int(out_time)
        time1 = int((start - BEGINNING) / STEPS)
        time2 = int((end - BEGINNING) / STEPS)

        #    print(start,end,time1,time2)
        time = range(time1, time2)
        #    print(time1,time2)
        for i in time:
            #        print(i)
            destination[i][day] += int(value)

    #M = [0], T = [1], W = [2], TH = [3], F = [4]

    for row in final_schedule:
        #    print(row[0])

        for day in range(len(row[0])):

            if row[0][day] == 'M':
                save2arr(0, row[1], row[2], on_campus, row[-1])
    #            print('M')

            elif row[0][day] == 'T':
                if day < len(row[0]) - 1 and row[0][day + 1] == 'h':
                    #                    if row[0][day+1] == 'h':
                    save2arr(3, row[1], row[2], on_campus, row[-1])
                else:
                    save2arr(1, row[1], row[2], on_campus, row[-1])

            elif row[0][day] == 'W':
                save2arr(2, row[1], row[2], on_campus, row[-1])

            elif row[0][day] == 'F':
                save2arr(4, row[1], row[2], on_campus, row[-1])

            else:
                continue

    on_campus = on_campus.astype(int)
    '''    
        with open("2DonCampus.csv","w+") as my_csv:
            csvWriter = csv.writer(my_csv,delimiter=',')
            csvWriter.writerows(on_campus)

        with open("fileReadableSchedule.csv","w+") as my_csv:
            csvWriter = csv.writer(my_csv,delimiter=',')
            csvWriter.writerows(final_schedule)
    '''
    return on_campus