def copy_hook(pathname, dest, val=val, pub=pub): fname = os.path.basename(pathname) destination = urlunparse((dest.scheme, dest.hostname, os.path.join(dest.path, fname), dest.params, dest.query, dest.fragment)) info = val.get("info", "") if info: info = dict( (elt.strip().split('=') for elt in info.split(";"))) for infokey, infoval in info.items(): if "," in infoval: info[infokey] = infoval.split(",") else: info = {} try: info.update( parse(os.path.basename(val["origin"]), os.path.basename(pathname))) except ValueError: info.update( parse( os.path.basename( os.path.splitext(val["origin"])[0]), os.path.basename(pathname))) info['uri'] = destination info['uid'] = fname msg = Message(val["topic"], 'file', info) pub.send(str(msg)) LOGGER.debug("Message sent: %s", str(msg))
def fun(orig_pathname): """Publish what we have.""" if not fnmatch.fnmatch(orig_pathname, pattern): return else: LOGGER.debug('We have a match: %s', orig_pathname) pathname = unpack(orig_pathname, **attrs) info = attrs.get("info", {}) if info: info = dict((elt.strip().split('=') for elt in info.split(";"))) for infokey, infoval in info.items(): if "," in infoval: info[infokey] = infoval.split(",") info.update(parse(attrs["origin"], orig_pathname)) info['uri'] = pathname info['uid'] = os.path.basename(pathname) info['request_address'] = attrs.get( "request_address", get_own_ip()) + ":" + attrs["request_port"] msg = Message(attrs["topic"], 'file', info) publisher.send(str(msg)) with file_cache_lock: file_cache.appendleft(attrs["topic"] + '/' + info["uid"]) LOGGER.debug("Message sent: " + str(msg))
def read_gac(filename, reader_kwargs=None): """Read and calibrate AVHRR GAC level 1b data using satpy. Args: filename (str): AVHRR GAC level 1b file reader_kwargs (dict): Keyword arguments to be passed to the reader. Returns: The loaded data in a satpy.Scene object. """ scene = satpy.Scene(filenames=[filename], reader="avhrr_l1b_gaclac", reader_kwargs=reader_kwargs) scene.load(BANDS) scene.load(AUX_DATA) # Add additional metadata basename = os.path.basename(filename) fname_info = trollsift.parse(GAC_FORMAT, basename) orbit_number_end = (fname_info["orbit_number"] // 100 * 100 + fname_info["end_orbit_last_digits"]) scene.attrs.update({ "gac_filename": basename, "orbit_number_start": fname_info["orbit_number"], "orbit_number_end": orbit_number_end, "ground_station": fname_info["station"], }) return scene
def _get_fh(self, filename='NSS.GHRR.NG.D88002.S0614.E0807.B0670506.WI', **kwargs): """Create a file handler.""" from trollsift import parse filename_info = parse(GAC_PATTERN, filename) return self.GACLACFile(filename, filename_info, {}, **kwargs)
def _get_notify_message_info(attrs, orig_pathname, pathname): info = _collect_attribute_info(attrs) info.update(parse(attrs["origin"], orig_pathname)) info['uri'] = pathname info['uid'] = os.path.basename(pathname) info['request_address'] = attrs.get( "request_address", get_own_ip()) + ":" + attrs["request_port"] return info
def _check_existing(self, start_time): """Check if there's an existing image that should be updated""" # check if something silmiar has already been made: # checks for: platform_name, areaname and # start_time +- timeliness minutes check_start_time = start_time - \ dt.timedelta(minutes=self.timeliness) check_dict = self.fileparts.copy() try: check_dict["tag"] = self.tags[0] except IndexError: pass if self.is_backup: check_dict["platform_name"] = '*' check_dict["sat_loc"] = '*' # check_dict["composite"] = '*' first_overpass = True update_fname_parts = {} for i in range(2 * self.timeliness + 1): check_dict[self.time_name] = \ check_start_time + dt.timedelta(minutes=i) glob_pattern = compose( os.path.join(self.out_dir, self.out_pattern), check_dict) logging.debug("Check pattern: %s", glob_pattern) glob_fnames = glob.glob(glob_pattern) if len(glob_fnames) > 0: fname = os.path.basename(glob_fnames[0]) first_overpass = False logging.debug("Found files: %s", str(glob_fnames)) try: update_fname_parts = parse(self.out_pattern, fname) update_fname_parts["composite"] = \ self.fileparts["composite"] if not self.is_backup: try: update_fname_parts["platform_name"] = \ self.fileparts["platform_name"] return update_fname_parts except KeyError: pass except ValueError: logging.debug("Parsing failed for update_fname_parts.") logging.debug("out_pattern: %s, basename: %s", self.out_pattern, fname) update_fname_parts = {} # Only backup, so save only if there were no matches if self.is_backup and not first_overpass: logging.info("File already exists, no backuping needed.") return None # No existing image else: return {}
def _file_equals_granule(file_path, granule, identity_check_seconds, file_pattern): """Check if a file matches the given granule.""" file_parts = trollsift.parse(file_pattern, os.path.basename(file_path)) granule_path = granule["properties"]["location"] granule_parts = trollsift.parse(file_pattern, os.path.basename(granule_path)) time_diff = file_parts.pop("start_time") - granule_parts.pop("start_time") if abs(time_diff.total_seconds()) > identity_check_seconds: return False else: all_identical = True for key in file_parts: if file_parts[key] != granule_parts[key]: all_identical = False break if all_identical: logger.info("Matching granule already exists. New: %s, old: %s", file_path, granule_path) return True return False
def copy_hook(pathname, dest, val=val, pub=pub): fname = os.path.basename(pathname) destination = urlunparse((dest.scheme, dest.hostname, os.path.join(dest.path, fname), dest.params, dest.query, dest.fragment)) info = val.get("info", "") if info: info = dict((elt.strip().split('=') for elt in info.split(";"))) for infokey, infoval in info.items(): if "," in infoval: info[infokey] = infoval.split(",") else: info = {} info.update(parse(val["origin"], pathname)) info['uri'] = destination info['uid'] = fname msg = Message(val["topic"], 'file', info) pub.send(str(msg)) LOGGER.debug("Message sent: " + str(msg))
def test_gaclacfile(self): """Test the methods of the GACLACFile class.""" from satpy.readers.avhrr_l1b_gaclac import GACLACFile from trollsift import parse from pygac.gac_klm import GACKLMReader from pygac.gac_pod import GACPODReader from satpy.dataset import DatasetID filename = np.random.choice(EXAMPLE_FILENAMES) filename_info = parse(GAC_PATTERN, filename) fh = GACLACFile(filename, filename_info, {}) self.assertLess(fh.start_time, fh.end_time, "Start time must preceed end time.") if fh.sensor == 'avhrr-3': self.assertIs(fh.reader_class, GACKLMReader) else: self.assertIs(fh.reader_class, GACPODReader) key = DatasetID('1') info = {'name': '1', 'standard_name': 'reflectance'} ch_ones = np.ones((10, 10)) acq_ones = np.ones((10, )) GACPODReader.return_value.get_calibrated_channels.return_value.__getitem__.return_value = ch_ones GACPODReader.return_value.get_times.return_value = acq_ones res = fh.get_dataset(key, info) np.testing.assert_allclose(res.data, ch_ones) self.assertIs(res.coords['acq_time'].data, acq_ones) for item in [ 'solar_zenith_angle', 'sensor_zenith_angle', # 'solar_azimuth_angle', 'sensor_azimuth_angle', 'sun_sensor_azimuth_difference_angle' ]: key = DatasetID(item) info = {'name': item} angle_ones = np.ones((10, 10)) acq_ones = np.ones((10, )) GACPODReader.return_value.get_angles.return_value = ( angle_ones, ) * 5 GACPODReader.return_value.get_times.return_value = acq_ones GACPODReader.return_value.get_tle_lines.return_value = 'tle1', 'tle2' res = fh.get_dataset(key, info) np.testing.assert_allclose(res.data, angle_ones) self.assertIs(res.coords['acq_time'].data, acq_ones) self.assertDictEqual(res.attrs['orbital_parameters'], {'tle': ('tle1', 'tle2')}) key = DatasetID('longitude') info = {'name': 'longitude', 'unit': 'degrees_east'} lon_ones = np.ones((10, 10)) lat_ones = np.ones((10, 10)) acq_ones = np.ones((10, )) GACPODReader.return_value.lons = None def fill_lonlat(): GACPODReader.return_value.lons = lon_ones GACPODReader.return_value.lats = lat_ones GACPODReader.return_value.get_lonlat.side_effect = fill_lonlat GACPODReader.return_value.get_times.return_value = acq_ones res = fh.get_dataset(key, info) np.testing.assert_allclose(res.data, lon_ones) self.assertEqual(res.attrs['unit'], 'degrees_east') self.assertIs(res.coords['acq_time'].data, acq_ones) key = DatasetID('latitude') info = {'name': 'latitude', 'unit': 'degrees_north'} res = fh.get_dataset(key, info) np.testing.assert_allclose(res.data, lat_ones) self.assertEqual(res.attrs['unit'], 'degrees_north') self.assertIs(res.coords['acq_time'].data, acq_ones) GACPODReader.return_value.get_lonlat.assert_called_once()
def run(self): '''Start waiting for messages. On message arrival, read the image, scale down to the defined sizes and add coastlines. ''' while self._loop: # Wait for new messages try: msg = self.listener.output_queue.get(True, 5) except KeyboardInterrupt: self.stop() raise except queue_empty: continue logging.info("New message with topic %s", msg.subject) self.subject = msg.subject self.filepath = urlparse(msg.data["uri"]).path try: self._update_current_config() except (NoOptionError, NoSectionError): logging.warning("Skip processing for this message.") continue self.time_name = self._get_time_name(msg.data) # Adjust in_pattern and out_pattern to match this time_name self.in_pattern = adjust_pattern_time_name(self.in_pattern, self.time_name) self.out_pattern = adjust_pattern_time_name( self.out_pattern, self.time_name) # parse filename parts from the incoming file try: self.fileparts = parse(self.in_pattern, os.path.basename(self.filepath)) except ValueError: logging.info("Filepattern doesn't match, skipping.") logging.debug("in_pattern: %s", self.in_pattern) logging.debug("fname: %s", os.path.basename(self.filepath)) continue self.fileparts['areaname'] = self.areaname self._tidy_platform_name() self.time_slot = msg.data[self.time_name] existing_fname_parts = \ self._check_existing(msg.data[self.time_name]) # There is already a matching image which isn't going to # be updated if existing_fname_parts is None: continue self.existing_fname_parts = existing_fname_parts # Read the image img = read_image(self.filepath) if img is None: logging.error("Could not read image %s", self.filepath) continue # Add overlays, if any img = self.add_overlays(img) # Save image(s) self.save_images(img) # Delete obsolete image object del img # Run garbage collection if configured self._gc()
def test_ImageScaler(self, cwriter, listener): scaler = sca.ImageScaler(self.config) scaler.subject = '/scaler' filename = '201702071200_Meteosat-10_EPSG4326_spam.png' filename = os.path.join(os.path.dirname(__file__), 'data', filename) res = scaler._get_conf_with_default('areaname') self.assertTrue(res == self.config.get('/scaler', 'areaname')) res = scaler._get_bool('only_backup') self.assertTrue(res == sca.DEFAULT_CONFIG_VALUES['only_backup']) res = scaler._get_bool('out_dir') self.assertFalse(res) scaler._get_text_settings() self.assertTrue( scaler.text_pattern == sca.DEFAULT_CONFIG_VALUES['text_pattern']) self.assertTrue(isinstance(scaler.text_settings, dict)) scaler.subject = '/empty/text/settings' with self.assertRaises(KeyError): scaler._get_mandatory_config_items() scaler.subject = '/not/existing' with self.assertRaises(KeyError): scaler._get_mandatory_config_items() scaler.subject = '/scaler' scaler._get_mandatory_config_items() self.assertTrue(scaler.areaname == self.config.get('/scaler', 'areaname')) self.assertTrue(scaler.in_pattern == self.config.get('/scaler', 'in_pattern')) self.assertTrue(scaler.out_pattern == self.config.get('/scaler', 'out_pattern')) scaler.fileparts.update(parse(scaler.out_pattern, os.path.basename(filename))) scaler._tidy_platform_name() self.assertTrue(scaler.fileparts['platform_name'] == "Meteosat10") scaler._update_current_config() # Test few config items that the have the default values self.assertEqual(scaler.timeliness, sca.DEFAULT_CONFIG_VALUES['timeliness']) self.assertEqual(len(scaler.tags), len(sca.DEFAULT_CONFIG_VALUES['tags'])) # And the config values self.assertTrue(scaler.areaname == self.config.get('/scaler', 'areaname')) self.assertTrue(scaler.in_pattern == self.config.get('/scaler', 'in_pattern')) self.assertTrue(scaler.out_pattern == self.config.get('/scaler', 'out_pattern')) scaler._parse_crops() self.assertEqual(len(scaler.crops), 0) scaler._parse_sizes() self.assertEqual(len(scaler.sizes), 0) scaler._parse_tags() self.assertEqual(len(scaler.tags), 0) scaler.subject = '/crops/sizes/tags' scaler._update_current_config() scaler._parse_crops() self.assertEqual(len(scaler.crops), 2) self.assertEqual(len(scaler.crops[0]), 4) self.assertTrue(scaler.crops[1] is None) scaler._parse_sizes() self.assertEqual(len(scaler.sizes), 3) self.assertEqual(len(scaler.sizes[0]), 2) scaler._parse_tags() self.assertEqual(len(scaler.tags), 3) # Default text settings (black on white) res = scaler._add_text(self.img_l.copy(), 'PL') self.assertTrue(res.mode == 'L') res = scaler._add_text(self.img_la.copy(), 'PL') self.assertTrue(res.mode == 'LA') res = scaler._add_text(self.img_rgb.copy(), 'PL') self.assertTrue(res.mode == 'RGB') res = scaler._add_text(self.img_rgba.copy(), 'PL') self.assertTrue(res.mode == 'RGBA') scaler.fileparts.update(parse(scaler.out_pattern, os.path.basename(filename))) tslot = dt.datetime.utcnow() # File that doesn't exist res = scaler._check_existing(tslot) self.assertEqual(len(res), 0) # Existing file with "is_backup" set to False so we should get a full # set of metadata scaler.out_dir = os.path.join(os.path.dirname(__file__), 'data') tslot = scaler.fileparts['time'] res = scaler._check_existing(tslot) self.assertEqual(res['time'], tslot) self.assertEqual(res['areaname'], scaler.areaname) self.assertEqual(res['platform_name'], scaler.fileparts['platform_name']) self.assertEqual(res['composite'], 'spam') # Existing file with "is_backup" set to True scaler.is_backup = True res = scaler._check_existing(tslot) self.assertIsNone(res)
def run(self, product_config, msg, config_dir): """Process the data """ LOGGER.info('New data available: type = %s', msg.type) self._data_ok = True self.set_config(product_config) self.layout_handler = LayoutHandler(product_config, config_dir) if msg.type in ['dataset']: geo_img = None for ds_proc in self.dataset_processors: if re.match(ds_proc['msg_subject_pattern'], msg.subject): vps = ds_proc.get('var_parse', []) if not isinstance(vps, list): vps = [vps] for vp in vps: new_vals = parse(vp['parse_pattern'], msg.data[vp['msg_key']]) msg.data.update(new_vals) proc_func_params = ds_proc.get( 'processing_function_params', None) module_name, function_name = \ ds_proc['processing_function'].split('|') func = get_custom_function(module_name, function_name) geo_img = func(msg, proc_func_params) in_filename_base = ds_proc['output_name'] in_filename = None break if geo_img is None: LOGGER.warning("no image created by dataset_processpor") else: geo_img = None LOGGER.info('uri: %s', msg.data['uri']) p = urlparse(msg.data['uri']) if p.netloc != '': LOGGER.error('uri not supported: {0}'.format(msg.data['uri'])) return in_filename = p.path in_filename_base = os.path.basename(in_filename) rules_to_apply = [] rules_to_apply_groups = set() copy_src_file_only = True # find matching rules for rule in self.rules: pattern = rule['input_pattern'] if re.match(pattern, in_filename_base): # if fnmatch(in_filename_base, pattern): if 'rule_group' in rule: rg = rule['rule_group'] if rg in rules_to_apply_groups: continue else: rules_to_apply_groups.add(rg) LOGGER.info("Rule match (%s)" % rule) rules_to_apply.append(rule) if rule.get('copySrcFileOnly', 'false').lower() not in ["true", "yes", "1"]: copy_src_file_only = False if len(rules_to_apply) > 0: t1a = time.time() # load image area = get_area_def(msg.data['area']['name']) # and apply each rule for rule in rules_to_apply: params = self.merge_and_resolve_parameters(msg, rule) time_name = rule.get('time_name', 'time_eos') timeslot = params.get(time_name) if not isinstance(timeslot, dt.date): timeslot = dt.datetime.strptime(timeslot, "%Y%m%d%H%M%S") # load image only when necessary if geo_img is None: if not copy_src_file_only: geo_img = read_image(in_filename, area, timeslot) box_out_dir = self.out_boxes[rule['out_box_ref']]['output_dir'] fname_pattern = rule['dest_filename'] fname = self.create_filename(fname_pattern, box_out_dir, params) if not os.path.exists(self.rrd_dir): os.makedirs(self.rrd_dir) base_rrd_fname = rule.get('rrd_filename', os.path.basename(fname_pattern) + ".rrd") rrd_fname = self.create_filename(re.sub(r"\{[^\}]*:\%[^\}]*\}", "xx", base_rrd_fname), self.rrd_dir, params) rrd_steps = int(rule.get('rrd_steps', '900')) # todo: layouting etc # try: # self.layout_handler.layout(geo_img, area) # except ValueError as e: # LOGGER.error("Layouting failed: " + str(e)) if rule.get('copySrcFileOnly', 'false').lower() in ["true", "yes", "1"]: # copy inputput file only rule_geo_img = None else: rule_geo_img = geo_img self.writer.write(self.save_img, rule_geo_img, in_filename, fname, rrd_fname, rrd_steps, timeslot, params) LOGGER.info('pr %.1f s', (time.time() - t1a)) # Wait for the writer to finish if self._data_ok: LOGGER.debug("Waiting for the files to be saved") self.writer.prod_queue.join() if self._data_ok: LOGGER.debug("All files saved") LOGGER.info( 'File %s processed in %.1f s', in_filename, time.time() - t1a) if not self._data_ok: LOGGER.warning("File %s not processed due to " "incomplete/missing/corrupted data." % msg.data['product_filename']) else: LOGGER.info( "no matching rule found for %s" % in_filename)
def _get_store_name_from_filename(config, fname): """Parse store name from filename.""" file_pattern = config["file_pattern"] file_parts = trollsift.parse(file_pattern, os.path.basename(fname)) layer_id = file_parts[config["layer_id"]] return config["layers"][layer_id]
def run(self, product_config, msg, config_dir): """Process the data """ LOGGER.info('New data available: type = %s', msg.type) self._data_ok = True self.set_config(product_config) self.layout_handler = LayoutHandler(product_config, config_dir) if msg.type in ['dataset']: geo_img = None for ds_proc in self.dataset_processors: if re.match(ds_proc['msg_subject_pattern'], msg.subject): vps = ds_proc.get('var_parse', []) if not isinstance(vps, list): vps = [vps] for vp in vps: new_vals = parse(vp['parse_pattern'], msg.data[vp['msg_key']]) msg.data.update(new_vals) proc_func_params = ds_proc.get( 'processing_function_params', None) module_name, function_name = \ ds_proc['processing_function'].split('|') func = get_custom_function(module_name, function_name) geo_img = func(msg, proc_func_params) in_filename_base = ds_proc['output_name'] in_filename = None break if geo_img is None: LOGGER.warning("no image created by dataset_processpor") else: geo_img = None LOGGER.info('uri: %s', msg.data['uri']) p = urlparse(msg.data['uri']) if p.netloc != '': LOGGER.error('uri not supported: {0}'.format(msg.data['uri'])) return in_filename = p.path in_filename_base = os.path.basename(in_filename) rules_to_apply = [] rules_to_apply_groups = set() copy_src_file_only = True # find matching rules for rule in self.rules: pattern = rule['input_pattern'] if re.match(pattern, in_filename_base): # if fnmatch(in_filename_base, pattern): if 'rule_group' in rule: rg = rule['rule_group'] if rg in rules_to_apply_groups: continue else: rules_to_apply_groups.add(rg) LOGGER.info("Rule match (%s)" % rule) rules_to_apply.append(rule) if rule.get('copySrcFileOnly', 'false').lower() not in ["true", "yes", "1"]: copy_src_file_only = False if len(rules_to_apply) > 0: t1a = time.time() # load image area = get_area_def(msg.data['area']['name']) # and apply each rule for rule in rules_to_apply: params = self.merge_and_resolve_parameters(msg, rule) time_name = rule.get('time_name', 'time_eos') timeslot = params.get(time_name) if not isinstance(timeslot, dt.date): timeslot = dt.datetime.strptime(timeslot, "%Y%m%d%H%M%S") # load image only when necessary if geo_img is None: if not copy_src_file_only: geo_img = read_image(in_filename, area, timeslot) box_out_dir = self.out_boxes[rule['out_box_ref']]['output_dir'] fname_pattern = rule['dest_filename'] fname = self.create_filename(fname_pattern, box_out_dir, params) if not os.path.exists(self.rrd_dir): os.makedirs(self.rrd_dir) base_rrd_fname = rule.get( 'rrd_filename', os.path.basename(fname_pattern) + ".rrd") rrd_fname = self.create_filename( re.sub(r"\{[^\}]*:\%[^\}]*\}", "xx", base_rrd_fname), self.rrd_dir, params) rrd_steps = int(rule.get('rrd_steps', '900')) # todo: layouting etc # try: # self.layout_handler.layout(geo_img, area) # except ValueError as e: # LOGGER.error("Layouting failed: " + str(e)) if rule.get('copySrcFileOnly', 'false').lower() in ["true", "yes", "1"]: # copy inputput file only rule_geo_img = None else: rule_geo_img = geo_img self.writer.write(self.save_img, rule_geo_img, in_filename, fname, rrd_fname, rrd_steps, timeslot, params) LOGGER.info('pr %.1f s', (time.time() - t1a)) # Wait for the writer to finish if self._data_ok: LOGGER.debug("Waiting for the files to be saved") self.writer.prod_queue.join() if self._data_ok: LOGGER.debug("All files saved") LOGGER.info('File %s processed in %.1f s', in_filename, time.time() - t1a) if not self._data_ok: LOGGER.warning("File %s not processed due to " "incomplete/missing/corrupted data." % msg.data['product_filename']) else: LOGGER.info("no matching rule found for %s" % in_filename)