def database_save(self, txn): """Save this product to the database""" table = "mcd" if self.afos == "SWOMCD" else "mpd" # Remove any previous entries sql = f"DELETE from {table} where product_id = %s and num = %s" txn.execute(sql, (self.get_product_id(), self.discussion_num)) if txn.rowcount > 0: LOG.info( "mcd.database_save %s %s removed %s entries", self.get_product_id(), self.discussion_num, txn.rowcount, ) giswkt = "SRID=4326;%s" % (self.geometry.wkt, ) sql = ( f"INSERT into {table} (product, product_id, geom, issue, expire, " "num, year, watch_confidence, concerning) " "values (%s, %s, %s, %s, %s, %s, %s, %s, %s)") args = ( self.text, self.get_product_id(), giswkt, self.sts, self.ets, self.discussion_num, self.valid.year, self.find_watch_probability(), self.concerning, ) txn.execute(sql, args)
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """ constructor """ TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) # Hold our parsing results as an array of dicts self.data = [] self.regime = None # Sometimes, we get products that are not really in CLI format but # are RER (record event reports) with a CLI AWIPS ID if self.wmo[:2] != "CD": LOG.info("Product %s skipped due to wrong header", self.get_product_id()) return for section in self.find_sections(): if not HEADLINE_RE.findall(section.replace("\n", " ")): continue # We have meat! self.compute_diction(section) valid, station = self.parse_cli_headline(section) data = self.parse_data(section) self.data.append( dict( cli_valid=valid, cli_station=station, db_station=None, data=data, ))
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """Constructor Args: text (string): the raw PTS product that is to be parsed utcnow (datetime, optional): in case of ambuigity with time ugc_provider (dict, optional): unused in this class nwsli_provider (dict, optional): unused in this class """ TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) LOG.info("==== SPCPTS Processing: %s", self.get_product_id()) load_conus_data(self.valid) self.issue = None self.expire = None self.day = None self.outlook_type = None self.outlook_collections = dict() self.set_metadata() self.find_issue_expire() self.find_outlooks() self.quality_control()
def contime(s): """Process a string into time.""" if len(re.findall("0000*T", s)) > 0: return None try: ts = datetime.strptime(s, "%y%m%dT%H%MZ") return ts.replace(tzinfo=timezone.utc) except Exception as err: LOG.exception(err) return None
def setup_syslog(): """Setup how we want syslogging to work""" # https://stackoverflow.com/questions/13699283 frame = inspect.stack()[-1] module = inspect.getmodule(frame[0]) filename = os.path.basename(module.__file__) syslog.startLogging(prefix="pyWWA/%s" % (filename, ), facility=LOG_LOCAL2) # pyIEM does logging via python stdlib logging, so we need to patch those # messages into twisted's logger. LOG.addHandler(logging.StreamHandler(stream=log.logfile)) # Allow for more verbosity when we are running this manually. LOG.setLevel(logging.DEBUG if sys.stdout.isatty() else logging.INFO)
def init_projection(self): """ Setup Grid and projection details """ if self.metadata["map_projection"] == 3: self.init_llc() elif self.metadata["map_projection"] == 1: self.init_mercator() elif self.metadata["map_projection"] == 5: self.init_stereo() else: LOG.info("Unknown Projection: %s", self.metadata["map_projection"])
def do_sql_observed(self, cursor, _hml): """Process the observed portion of the dataset""" ob = _hml.data["observed"] if ob["dataframe"] is None: return df = ob["dataframe"] if df.empty: return for col in ["primary", "secondary"]: if ob[col + "Name"] is None: continue key = "%s[%s]" % (ob[col + "Name"], ob[col + "Units"]) # Check that we have some non-null data df2 = df[pd.notnull(df[col])] if df2.empty: continue minvalid = df2["valid"].min() maxvalid = df2["valid"].max() cursor.execute( """ DELETE from hml_observed_data WHERE station = %s and valid >= %s and valid <= %s and key = get_hml_observed_key(%s) """, (_hml.station, minvalid, maxvalid, key), ) for _, row in df2.iterrows(): val = row[col] if val is None: continue cursor.execute( ("INSERT into hml_observed_data " "(station, valid, key, value) " "VALUES (%s, %s, get_hml_observed_key(%s), %s) " "RETURNING key"), (_hml.station, row["valid"], key, val), ) if cursor.fetchone()[0] is not None: continue # Delete the bad row cursor.execute( "DELETE from hml_observed_data WHERE station = %s and " "valid = %s and key is null", (_hml.station, row["valid"]), ) # Need to create a new unit! cursor.execute( "INSERT into hml_observed_keys(id, label) VALUES (" "(SELECT coalesce(max(id) + 1, 0) from hml_observed_keys)," "%s) RETURNING id", (key, ), ) LOG.info("Created key %s for %s", cursor.fetchone()[0], key)
def _resent_match(prod, txn, warning_table, vtec): """Check if this is a resent match.""" txn.execute( f"SELECT max(updated) as maxtime from {warning_table} " "WHERE eventid = %s and significance = %s and wfo = %s and " "phenomena = %s", (vtec.etn, vtec.significance, vtec.office, vtec.phenomena), ) maxtime = txn.fetchone()["maxtime"] if maxtime is not None and maxtime == prod.valid: LOG.info("RESENT Match, skipping SQL for %s!", prod.get_product_id()) return True return False
def str2multipolygon(s): """Convert string PTS data into a polygon. Args: s (str): the cryptic string that we attempt to make valid polygons from """ segments = get_segments_from_text(s) # Simple case whereby the segment is its own circle, thank goodness if len(segments) == 1: res = look_for_closed_polygon(segments[0]) if res: return res # Keep track of generated polygons polys = [] # currentpoly is our present subject of interest currentpoly = copy.deepcopy(CONUS["poly"]) for i, segment in enumerate(segments): # debug_draw(segment, currentpoly) LOG.info( " Iterate: %s/%s, len(segment): %s (%.2f %.2f) (%.2f %.2f)", i + 1, len(segments), len(segment), segment[0][0], segment[0][1], segment[-1][0], segment[-1][1], ) currentpoly = segment_logic(segment, currentpoly, polys) polys.append(currentpoly) res = [] LOG.info(" Resulted in len(polys): %s, now quality controlling", len(polys)) for i, poly in enumerate(polys): if not poly.is_valid: LOG.info(" ERROR: polygon %s is invalid!", i) continue if poly.area == CONUS["poly"].area: LOG.info(" polygon %s is just CONUS, skipping", i) continue LOG.info(" polygon: %s has area: %s", i, poly.area) res.append(poly) if not res: raise Exception(("Processed no geometries, this is a bug!\n" " s is %s\n" " segments is %s" % (repr(s), repr(segments)))) return MultiPolygon(res)
def contime(text): """Convert text into a UTC datetime.""" # The 0000 is the standard VTEC undefined time if text.startswith("0000"): return None try: ts = datetime.strptime(text, "%y%m%dT%H%MZ") except Exception as err: LOG.exception(err) return None # NWS has a bug sometimes whereby 1969 or 1970s timestamps are emitted if ts.year < 1971: return None return ts.replace(tzinfo=timezone.utc)
def convert_key(text): """ Convert a key value to something we store """ if text is None: return None if text == "YESTERDAY": return "today" if text == "TODAY": return "today" if text == "MONTH TO DATE": return "month" if text.startswith("SINCE "): return text.replace("SINCE ", "").replace(" ", "").lower() LOG.info("convert_key() failed for |%s|", text) return "fail"
def get_folders(drive): """Return a dict of Google Drive Folders""" f = {} # Whoa, just because maxResults=999 and the returned items is less # than 999, it does not mean the list was complete folders = ( drive.files() .list( q="mimeType = 'application/vnd.google-apps.folder'", maxResults=999 ) .execute() ) folder_list = folders["items"] i = 0 while "nextPageToken" in folders: folders = ( drive.files() .list( pageToken=folders["nextPageToken"], q="mimeType = 'application/vnd.google-apps.folder'", maxResults=999, ) .execute() ) folder_list = folder_list + folders["items"] i += 1 if i > 10: LOG.info("get_folders iterator reached 10, aborting") break for _, item in enumerate(folder_list): f[item["id"]] = dict(title=item["title"], parents=[], basefolder=None) for parent in item["parents"]: f[item["id"]]["parents"].append(parent["id"]) for thisfolder in f: # title = f[thisfolder]['title'] if not f[thisfolder]["parents"]: continue parentfolder = f[thisfolder]["parents"][0] if parentfolder not in f: LOG.info("ERROR: parentfolder: %s not in f", parentfolder) continue while parentfolder in f and len(f[parentfolder]["parents"]) > 0: parentfolder = f[parentfolder]["parents"][0] f[thisfolder]["basefolder"] = parentfolder return f
def cleanvalue(val): """cleanup the mess that is found in the Google Sheets for values Args: val (str): The value to clean up Returns: the cleaned value! """ if val is None or val.strip() == "": return None if NUMBER_RE.match(val): return float(val) if CLEANVALUE_XREF.get(val): return CLEANVALUE_XREF[val] if val.lower() in [ "did not collect", ".", "n/a", "clay", "silty clay", "silty clay loam", "clay loam", "sandy clay loam", "silt loam", "silty loam", "sandy loam", "sandy clay", "sand", "loam", "silt", "loamy sand", ]: return val.lower() if val.find("%") > -1: val = val.replace("%", "") if NUMBER_RE.match(val): return float(val) if val.find("<") > -1: return "< %s" % (val.replace("<", "").strip(),) if val not in CLEANVALUE_COMPLAINED: LOG.info( "cscap_utils.cleanvalue(%s) is unaccounted for, return None", repr(val), ) CLEANVALUE_COMPLAINED.append(val) return None
def init_llc(self): """ Initialize Lambert Conic Comformal """ self.metadata["proj"] = pyproj.Proj( proj="lcc", lat_0=self.metadata["latin"], lat_1=self.metadata["latin"], lat_2=self.metadata["latin"], lon_0=self.metadata["lov"], a=6371200.0, b=6371200.0, ) # s = 1.0 # if self.metadata['proj_center_flag'] != 0: # s = -1.0 psi = M_PI_2 - abs(math.radians(self.metadata["latin"])) cos_psi = math.cos(psi) # r_E = RE_METERS / cos_psi alpha = math.pow(math.tan(psi / 2.0), cos_psi) / math.sin(psi) x0, y0 = self.metadata["proj"](self.metadata["lon1"], self.metadata["lat1"]) self.metadata["x0"] = x0 self.metadata["y0"] = y0 # self.metadata['dx'] *= alpha # self.metadata['dy'] *= alpha self.metadata["y1"] = y0 + (self.metadata["dy"] * self.metadata["ny"]) (self.metadata["lon_ul"], self.metadata["lat_ul"]) = self.metadata["proj"](self.metadata["x0"], self.metadata["y1"], inverse=True) LOG.info( ("lat1: %.5f y0: %5.f y1: %.5f lat_ul: %.3f " "lat_ur: %.3f lon_ur: %.3f alpha: %.5f dy: %.3f"), self.metadata["lat1"], y0, self.metadata["y1"], self.metadata["lat_ul"], self.metadata["lat_ur"], self.metadata["lon_ur"], alpha, self.metadata["dy"], )
def parser(msg, call_id, add_metar=False): """Parse the message(single line) into a dict Args: msg (str): the single line of data to parse into a dict call_id (str): hard coded call_id as the data can't be trusted, sigh add_metar (bool,optional): should a METAR be generated? Default: False Returns: dict or None """ match = DS3505_RE.match(msg) if not match: return data = match.groupdict() # Seems like these obs with this flag are 'bad' if data["srcflag"] in ["A", "B"]: return data["valid"] = datetime.strptime( "%s %s" % (data["yyyymmdd"], data["hhmi"]), "%Y%m%d %H%M").replace(tzinfo=timezone.utc) data["call_id"] = call_id data["lat"] = _d1000(data["lat"]) data["lon"] = _d1000(data["lon"]) data["wind_speed_mps"] = _d10(data["wind_speed_mps"]) data["airtemp_c"] = _d10(data["airtemp_c"]) data["dewpointtemp_c"] = _d10(data["dewpointtemp_c"]) data["mslp_hpa"] = _d10(data["mslp_hpa"]) for elem in ["drct", "ceiling_m", "vsby_m", "elevation"]: data[elem] = _tonumeric(data[elem]) data["extra"] = {} try: parse_extra(data, msg[105:]) except Exception: pass if add_metar: try: gen_metar(data) except Exception: LOG.info(json.dumps(data, indent=True, sort_keys=True, default=str)) raise return data
def init_stereo(self): """ Compute Polar Stereographic """ self.metadata["proj"] = pyproj.Proj( proj="stere", lat_ts=60, lat_0=90, lon_0=self.metadata["lov"], x_0=0, y_0=0, a=6371200.0, b=6371200.0, ) # First point! x0, y0 = self.metadata["proj"](self.metadata["lon1"], self.metadata["lat1"]) self.metadata["x0"] = x0 self.metadata["y0"] = y0 self.metadata["y1"] = y0 + (self.metadata["dy"] * self.metadata["ny"]) (self.metadata["lon_ul"], self.metadata["lat_ul"]) = self.metadata["proj"](x0, self.metadata["y1"], inverse=True) LOG.info( ("lon_ul: %.2f lat_ul: %.2f " "lon_ll: %.2f lat_ll: %.2f " " lov: %.2f latin: %.2f lat1: %.2f lat2: %.2f " "y0: %5.f y1: %.5f dx: %.3f dy: %.3f"), self.metadata["lon_ul"], self.metadata["lat_ul"], self.metadata["lon1"], self.metadata["lat1"], self.metadata["lov"], self.metadata["latin"], self.metadata["lat1"], self.metadata["lat2"], y0, self.metadata["y1"], self.metadata["dx"], self.metadata["dy"], )
def draw_outlooks(self): """ For debugging, draw the outlooks on a simple map for inspection!""" from descartes.patch import PolygonPatch import matplotlib.pyplot as plt for day, collect in self.outlook_collections.items(): for outlook in collect.outlooks: fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) # pylint: disable=unsubscriptable-object ax.plot( CONUS["line"][:, 0], CONUS["line"][:, 1], color="b", label="Conus", ) for poly in outlook.geometry: patch = PolygonPatch( poly, fc="tan", label="Outlook %.1f" % (poly.area, ), zorder=2, ) ax.add_patch(patch) ax.plot( poly.exterior.xy[0], poly.exterior.xy[1], lw=2, color="r", ) ax.set_title(("Day %s Category %s Threshold %s") % (day, outlook.category, outlook.threshold)) ax.legend(loc=3) fn = (("/tmp/%s_%s_%s_%s.png") % ( day, self.issue.strftime("%Y%m%d%H%M"), outlook.category, outlook.threshold, )).replace(" ", "_") LOG.info(":: creating plot %s", fn) fig.savefig(fn) del fig del ax
def look_for_closed_polygon(segment): """Simple logic to see if our polygon is already closed.""" if segment[0][0] == segment[-1][0] and segment[0][1] == segment[-1][1]: LOG.info("Single closed polygon found, done and done") return MultiPolygon([Polygon(segment)]) # Slightly bad line-work, whereby the start and end points are very close # to each other if ((segment[0][0] - segment[-1][0])**2 + (segment[0][1] - segment[-1][1])**2)**0.5 < 0.05: LOG.info( "assuming linework error, begin: (%.2f %.2f) end: (%.2f %.2f)", segment[0][0], segment[0][1], segment[-1][0], segment[-1][1], ) segment[-1] = segment[0] return MultiPolygon([Polygon(segment)])
def process_latlon(self): """Parse the segment looking for the 'standard' LAT...LON encoding""" data = self.unixtext.replace("\n", " ") search = LAT_LON_PREFIX.search(data) if search is None: return None pos = search.start() newdata = data[pos + 9:] # Go find our next non-digit, non-space character, if we find it, we # should truncate our string, this could be improved, I suspect search = re.search(r"[^\s0-9]", newdata) if search is not None: pos2 = search.start() newdata = newdata[:pos2] poly = str2polygon(newdata) if poly is None: return None # check 0, PGUM polygons are east longitude akrherz/pyIEM#74 if self.tp.source == "PGUM": newpts = [[0 - pt[0], pt[1]] for pt in poly.exterior.coords] poly = Polygon(newpts) # check 1, is the polygon valid? if not poly.is_valid: self.tp.warnings.append( ("LAT...LON polygon is invalid!\n%s") % (poly.exterior.xy, )) return None # check 2, is the exterior ring of the polygon clockwise? if poly.exterior.is_ccw: # No longer a warning as it was too much noise LOG.info( "LAT...LON polygon exterior is CCW, reversing\n%s", poly.exterior.xy, ) poly = Polygon( zip(poly.exterior.xy[0][::-1], poly.exterior.xy[1][::-1])) self.giswkt = "SRID=4326;%s" % (dumps(MultiPolygon([poly]), rounding_precision=6), ) return poly
def init_mercator(self): """ Compute mercator projection stuff """ self.metadata["proj"] = pyproj.Proj( proj="merc", lat_ts=self.metadata["latin"], x_0=0, y_0=0, a=6371200.0, b=6371200.0, ) x0, y0 = self.metadata["proj"](self.metadata["lon1"], self.metadata["lat1"]) self.metadata["x0"] = x0 self.metadata["y0"] = y0 x1, y1 = self.metadata["proj"](self.metadata["lon2"], self.metadata["lat2"]) self.metadata["x1"] = x1 self.metadata["y1"] = y1 self.metadata["dx"] = (x1 - x0) / self.metadata["nx"] self.metadata["dy"] = (y1 - y0) / self.metadata["ny"] (self.metadata["lon_ul"], self.metadata["lat_ul"]) = self.metadata["proj"](self.metadata["x0"], self.metadata["y1"], inverse=True) LOG.info( ("latin: %.2f lat_ul: %.3f lon_ul: %.3f " "y0: %5.f y1: %.5f dx: %.3f dy: %.3f"), self.metadata["latin"], self.metadata["lat_ul"], self.metadata["lon_ul"], y0, y1, self.metadata["dx"], self.metadata["dy"], )
def get_number(text): """ Convert a string into a number, preferable a float! """ if text is None: return None text = text.strip() if text == "": retval = None elif text == "MM": retval = None elif text == "T": retval = TRACE_VALUE else: number = re.findall(r"[\-\+]?\d*\.\d+|[\-\+]?\d+", text) if len(number) == 1: if text.find(".") > 0: retval = float(number[0]) else: retval = int(number[0]) else: LOG.info("get_number() failed for |%s|", text) retval = None return retval
def sql(self, txn): """Do database work Args: txn (psycopg2.cursor): database cursor """ for day, collect in self.outlook_collections.items(): txn.execute( """ DELETE from spc_outlooks where product_issue = %s and expire = %s and outlook_type = %s and day = %s """, (self.valid, self.expire, self.outlook_type, day), ) if txn.rowcount > 0: LOG.info("Removed %s previous spc_outlook entries", txn.rowcount) for outlook in collect.outlooks: if outlook.geometry.is_empty: continue sql = """ INSERT into spc_outlooks(product_issue, issue, expire, threshold, category, day, outlook_type, geom) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) """ args = ( self.valid, collect.issue, collect.expire, outlook.threshold, outlook.category, collect.day, self.outlook_type, "SRID=4326;%s" % (outlook.geometry.wkt, ), ) txn.execute(sql, args)
def __init__(self, fobj): """Create a GNIFile instance with a compressed file object Args: fobj (file): A fileobject """ fobj.seek(0) # WMO HEADER self.wmo = (fobj.read(21)).strip().decode("utf-8") d = zlib.decompressobj() hdata = d.decompress(fobj.read()) self.metadata = self.read_header(hdata[21:]) self.init_projection() totsz = len(d.unused_data) # 5120 value chunks, so we need to be careful! sdata = b"" chunk = b"x\xda" i = 0 for part in d.unused_data.split(b"x\xda"): if part == b"" and i == 0: continue chunk += part try: sdata += zlib.decompress(chunk) i += 1 totsz -= len(chunk) chunk = b"x\xda" except Exception: chunk += b"x\xda" if totsz != 0: LOG.info("Totalsize left: %s", totsz) self.data = np.reshape( np.fromstring(sdata, np.int8), (self.metadata["numlines"] + 1, self.metadata["linesize"]), )
def compute_wfos(self, txn): """Figure out which WFOs are impacted by this polygon""" for day, collect in self.outlook_collections.items(): for outlook in collect.outlooks: if outlook.geometry.is_empty: continue sql = """ select distinct wfo from ugcs WHERE st_contains(ST_geomFromEWKT('SRID=4326;%s'), centroid) and substr(ugc,3,1) = 'C' and wfo is not null and end_ts is null ORDER by wfo ASC """ % (outlook.geometry.wkt, ) txn.execute(sql) for row in txn.fetchall(): outlook.wfos.append(row["wfo"]) LOG.info( "Day: %s Category: %s Threshold: %s #WFOS: %s %s", day, outlook.category, outlook.threshold, len(outlook.wfos), ",".join(outlook.wfos), )
def segment_logic(segment, currentpoly, polys): """Our segment parsing logic.""" if segment[0] == segment[-1] and len(segment) > 2: LOG.info(" segment is closed polygon!") lr = LinearRing(LineString(segment)) if not lr.is_ccw: LOG.info(" polygon is clockwise (exterior), done.") polys.append(currentpoly) return Polygon(segment) LOG.info(" polygon is CCW (interior), testing intersection") if currentpoly.intersection(lr).is_empty: LOG.info(" failed intersection with currentpoly, abort") return currentpoly interiors = [ln for ln in currentpoly.interiors] interiors.append(lr) newp = Polygon(currentpoly.exterior, interiors) if not newp.is_valid: LOG.info(" adding interior invalid, buffering") newp = newp.buffer(0) if newp.is_valid: LOG.info( " polygon is interior to currentpoly, area: %.2f ", currentpoly.area, ) return newp raise Exception( "Adding interior polygon resulted in an invalid geometry, aborting" ) # All open lines need to intersect the CONUS, ensure that happens ls = LineString(segment) ls = clean_segment(ls) if isinstance(ls, MultiLineString): for _ls in ls: LOG.info(" look out below, recursive we go.") currentpoly = segment_logic(_ls.coords, currentpoly, polys) return currentpoly if ls is None: LOG.info(" aborting as clean_segment failed...") return currentpoly LOG.info( " new segment start: %.4f %.4f end: %.4f %.4f", ls.coords[0][0], ls.coords[0][1], ls.coords[-1][0], ls.coords[-1][1], ) # If this line segment does not intersect the current polygon of interest, # we should check any previous polygons to see if it intersects it. We # could be dealing with invalid ordering in the file, sigh. if currentpoly.intersection(ls).is_empty: LOG.info(" ls does not intersect currentpoly, looking for match") found = False for i, poly in enumerate(polys): intersect = poly.intersection(ls) if intersect.is_empty or isinstance(intersect, MultiLineString): continue LOG.info( " found previous polygon i:%s area: %.1f that intersects", i, poly.area, ) found = True polys.append(currentpoly) currentpoly = polys.pop(i) break if not found: LOG.info(" setting currentpoly back to CONUS") polys.append(currentpoly) currentpoly = copy.deepcopy(CONUS["poly"]) # Results in either [currentpoly] or [polya, polyb, ...] geomcollect = split(currentpoly, ls) if len(geomcollect) > 2: LOG.info(" line intersects polygon 3+ times, can't handle") return currentpoly if len(geomcollect) == 1: res = geomcollect.geoms[0] else: (polya, polyb) = geomcollect.geoms[0], geomcollect.geoms[1] # Linear reference our splitter's start and end distance startdist = polya.exterior.project(Point(ls.coords[0])) enddist = polya.exterior.project(Point(ls.coords[-1])) # if the end is further down the line, we want this polygon res = polya if enddist > startdist else polyb if res.area > 0.01: LOG.info(" taking polygon.area = %.4f", res.area) return res return currentpoly
def quality_control(self): """Run some checks against what was parsed""" # 1. Do polygons overlap for the same outlook LOG.info("==== Running Quality Control Checks") for day, collect in self.outlook_collections.items(): # Everything should be smaller than General Thunder, for conv tstm = self.get_outlook("CATEGORICAL", "TSTM", day) for outlook in collect.outlooks: rewrite = False # case of single polygon if tstm and len(outlook.geometry) == 1: if outlook.geometry.area > tstm.geometry.area: rewrite = True msg = ("Discarding polygon as it is larger than TSTM: " "Day: %s %s %s Area: %.2f TSTM Area: %.2f") % ( day, outlook.category, outlook.threshold, outlook.geometry.area, tstm.geometry.area, ) LOG.info(msg) self.warnings.append(msg) # clip polygons to the CONUS good_polys = [] for poly in outlook.geometry: intersect = CONUS["poly"].intersection(poly) if isinstance(intersect, GeometryCollection): for p in intersect: if isinstance(p, Polygon): good_polys.append(p) else: LOG.info("Discarding %s as not polygon", p) else: if isinstance(intersect, Polygon): good_polys.append(intersect) else: LOG.info("Discarding %s as not polygon", intersect) outlook.geometry = MultiPolygon(good_polys) good_polys = [] for poly1, poly2 in itertools.permutations( outlook.geometry, 2): if poly1.contains(poly2): rewrite = True msg = ("Discarding overlapping exterior polygon: " "Day: %s %s %s Area: %.2f") % ( day, outlook.category, outlook.threshold, poly1.area, ) LOG.info(msg) self.warnings.append(msg) elif tstm is not None and poly1.area > tstm.geometry.area: rewrite = True msg = ("Discarding polygon as it is larger than TSTM: " "Day: %s %s %s Area: %.2f") % ( day, outlook.category, outlook.threshold, poly1.area, ) LOG.info(msg) self.warnings.append(msg) else: if poly1 not in good_polys: good_polys.append(poly1) if rewrite: outlook.geometry = MultiPolygon(good_polys) # 2. Do the time bounds make sense, limited scope here if (self.day == 1 and (self.issue - self.valid).total_seconds() > 8 * 3600): self.warnings.append( ("time_bounds_check: day: %s issue: %s valid: %s expire: %s") % (self.day, self.issue, self.valid, self.expire))
def find_outlooks(self): """ Find the outlook sections within the text product! """ if self.text.find("&&") == -1: self.warnings.append("Product contains no &&, adding...") self.text = self.text.replace("\n... ", "\n&&\n... ") self.text += "\n&& " for segment in self.text.split("&&")[:-1]: day = self.day if day is None: day = get_day(segment) # We need to figure out the probabilistic or category tokens = re.findall(r"\.\.\.\s+(.*)\s+\.\.\.", segment) if not tokens: continue category = tokens[0].strip() point_data = {} # Now we loop over the lines looking for data threshold = None for line in segment.split("\n"): if (re.match( (r"^(D[3-8]\-?[3-8]?|EXTM|MRGL|ENH|SLGT|MDT|ELEV|" r"HIGH|CRIT|TSTM|SIGN|IDRT|SDRT|0\.[0-9][0-9]) "), line, ) is not None): newthreshold = line.split()[0] if threshold is not None and threshold == newthreshold: point_data[threshold] += " 99999999 " threshold = newthreshold if threshold is None: continue if threshold not in point_data: point_data[threshold] = "" point_data[threshold] += line.replace(threshold, " ") if day is not None: issue, expire = compute_times(self.afos, self.issue, self.expire, day) collect = self.outlook_collections.setdefault( day, SPCOutlookCollection(issue, expire, day)) # We need to duplicate, in the case of day-day spans for threshold in list(point_data.keys()): if threshold == "TSTM" and self.afos == "PFWF38": LOG.info(("Failing to parse TSTM in PFWF38")) del point_data[threshold] continue match = DMATCH.match(threshold) if match: data = match.groupdict() if data.get("day2") is not None: day1 = int(data["day1"]) day2 = int(data["day2"]) LOG.info("Duplicating threshold %s-%s", day1, day2) for i in range(day1, day2 + 1): key = "D%s" % (i, ) point_data[key] = point_data[threshold] del point_data[threshold] for threshold in point_data: match = DMATCH.match(threshold) if match: day = int(match.groupdict()["day1"]) issue, expire = compute_times(self.afos, self.issue, self.expire, day) collect = self.outlook_collections.setdefault( day, SPCOutlookCollection(issue, expire, day)) LOG.info( "--> Start Day: %s Category: '%s' Threshold: '%s' =====", day, category, threshold, ) mp = str2multipolygon(point_data[threshold]) if DMATCH.match(threshold): threshold = "0.15" LOG.info("----> End threshold is: %s", threshold) collect.outlooks.append(SPCOutlook(category, threshold, mp))
def to_iemaccess(self, txn, force_current_log=False, skip_current=False): """Persist parsed data to IEMAccess Database. Args: txn (psycopg2.cursor): database cursor / transaction force_current_log (boolean): should this ob always go to current_log skip_current (boolean): should this ob always skip current table """ gts = self.time.replace(tzinfo=timezone.utc) iem = Observation(self.iemid, self.network, gts) # Load the observation from the database, if the same time exists! iem.load(txn) # Need to figure out if we have a duplicate ob, if so, check # the length of the raw data, if greater, take the temps if iem.data["raw"] is None or len(iem.data["raw"]) < len(self.code): if self.temp: val = self.temp.value("F") # Place reasonable bounds on the temperature before saving it! if val > -90 and val < 150: iem.data["tmpf"] = round(val, 1) if self.dewpt: val = self.dewpt.value("F") # Place reasonable bounds on the temperature before saving it! if val > -150 and val < 100: iem.data["dwpf"] = round(val, 1) # Database only allows len 254 iem.data["raw"] = self.code[:254] # Always take a COR if self.code.find(" COR ") > -1: iem.data["raw"] = self.code[:254] wind_logic(iem, self) if self.max_temp_6hr: iem.data["max_tmpf_6hr"] = round(self.max_temp_6hr.value("F"), 1) if self.tzname and _is_same_day(iem.data["valid"], self.tzname): iem.data["max_tmpf_cond"] = iem.data["max_tmpf_6hr"] if self.min_temp_6hr: iem.data["min_tmpf_6hr"] = round(self.min_temp_6hr.value("F"), 1) if self.tzname and _is_same_day(iem.data["valid"], self.tzname): iem.data["min_tmpf_cond"] = iem.data["min_tmpf_6hr"] if self.max_temp_24hr: iem.data["max_tmpf_24hr"] = round(self.max_temp_24hr.value("F"), 1) if self.min_temp_24hr: iem.data["min_tmpf_24hr"] = round(self.min_temp_24hr.value("F"), 1) if self.precip_3hr: iem.data["p03i"] = trace(self.precip_3hr) if self.precip_6hr: iem.data["p06i"] = trace(self.precip_6hr) if self.precip_24hr: iem.data["p24i"] = trace(self.precip_24hr) # We assume the value is zero, sad! iem.data["phour"] = 0 if self.precip_1hr: iem.data["phour"] = trace(self.precip_1hr) if self.snowdepth: iem.data["snowd"] = self.snowdepth.value("IN") if self.vis: iem.data["vsby"] = self.vis.value("SM") if self.press: iem.data["alti"] = self.press.value("IN") if self.press_sea_level: iem.data["mslp"] = self.press_sea_level.value("MB") if self.press_sea_level and self.press: alti = self.press.value("MB") mslp = self.press_sea_level.value("MB") if abs(alti - mslp) > 25: LOG.info( "PRESSURE ERROR %s %s ALTI: %s MSLP: %s", iem.data["station"], iem.data["valid"], alti, mslp, ) if alti > mslp: iem.data["mslp"] += 100.0 else: iem.data["mslp"] -= 100.0 # Do something with sky coverage for i in range(len(self.sky)): (cov, hgh, _) = self.sky[i] iem.data["skyc%s" % (i + 1)] = cov if hgh is not None: iem.data["skyl%s" % (i + 1)] = hgh.value("FT") # Presentwx if self.weather: pwx = [] for wx in self.weather: val = "".join([a for a in wx if a is not None]) if val == "" or val == len(val) * "/": continue pwx.append(val) iem.data["wxcodes"] = pwx # Ice Accretion for hr in [1, 3, 6]: key = "ice_accretion_%shr" % (hr, ) iem.data[key] = trace(getattr(self, key)) return iem, iem.save(txn, force_current_log, skip_current)
def to_metar(textprod, text): """Create a METAR object, if possible""" # Do some cleaning and whitespace trimming text = sanitize(text) if len(text) < 10: return attempt = 1 mtr = None original_text = text valid = textprod.valid while attempt < 6 and mtr is None: try: mtr = METARReport(text, month=valid.month, year=valid.year) except MetarParserError as inst: tokens = ERROR_RE.findall(str(inst)) if tokens: if tokens[0] == text or text.startswith(tokens[0]): if not SA_RE.match(text): LOG.info( "%s Aborting due to non-replace %s", textprod.get_product_id(), str(inst), ) return # So tokens contains a series of groups that needs updated newtext = text for token in tokens[0].split(): newtext = newtext.replace(" %s" % (token, ), "") if newtext != text: text = newtext else: LOG.info("unparsed groups regex fail: %s", inst) if str(inst).find("day is out of range for month") > -1: if valid.day < 10: valid = valid.replace(day=1) - timedelta(days=1) attempt += 1 if mtr is not None: # Attempt to figure out more things if mtr.station_id is None: LOG.info("Aborting due to station_id being None |%s|", text) return None if mtr.time is None: LOG.info("Aborting due to time being None |%s|", text) return None # don't allow data more than an hour into the future ceiling = (textprod.utcnow + timedelta(hours=1)).replace(tzinfo=None) if mtr.time > ceiling: # careful, we may have obs from the previous month if ceiling.day < 5 and mtr.time.day > 15: prevmonth = ceiling - timedelta(days=10) mtr.time = mtr.time.replace(year=prevmonth.year, month=prevmonth.month) else: LOG.info( "Aborting due to time in the future " "ceiling: %s mtr.time: %s", ceiling, mtr.time, ) return None mtr.code = original_text mtr.iemid = (mtr.station_id[-3:] if mtr.station_id[0] == "K" else mtr.station_id) mtr.network = textprod.nwsli_provider.get(mtr.iemid, dict()).get("network") mtr.tzname = textprod.nwsli_provider.get(mtr.iemid, dict()).get("tzname") return mtr
def get_jabbers(self, uri, _uri2=None): """Make this into jabber messages""" jmsgs = [] for mtr in self.metars: msg = None for weatheri in mtr.weather: for wx in weatheri: if wx is not None and "GR" in wx: msg = "Hail" if TORNADO_RE.findall(mtr.code): msg = "Tornado" elif FUNNEL_RE.findall(mtr.code): msg = "Funnel Cloud" # Search for Peak wind gust info.... elif mtr.over_wind_threshold(): _msg = mtr.wind_message() if _msg: msg = _msg elif mtr.station_id in JABBER_SITES: # suck if JABBER_SITES[mtr.station_id] != mtr.time: JABBER_SITES[mtr.station_id] = mtr.time channels = ["METAR.%s" % (mtr.station_id, )] if mtr.type == "SPECI": channels.append("SPECI.%s" % (mtr.station_id, )) mstr = "%s %s" % (mtr.type, mtr.code) jmsgs.append( [mstr, mstr, dict(channels=",".join(channels))]) if msg: row = self.nwsli_provider.get(mtr.iemid, dict()) wfo = row.get("wfo") if wfo is None or wfo == "": LOG.info("Unknown WFO for id: %s, skipping alert", mtr.iemid) continue channels = ["METAR.%s" % (mtr.station_id, )] if mtr.type == "SPECI": channels.append("SPECI.%s" % (mtr.station_id, )) channels.append(wfo) st = row.get("state") nm = row.get("name") extra = "" if mtr.code.find("$") > 0: extra = "(Caution: Maintenance Check Indicator)" url = ("%s%s") % (uri, mtr.network) jtxt = ("%s,%s (%s) ASOS %s reports %s\n%s %s") % ( nm, st, mtr.iemid, extra, msg, mtr.code, url, ) jhtml = ( f'<p><a href="{url}">{nm},{st}</a> ({mtr.iemid}) ASOS ' f"{extra} reports <strong>{msg}</strong>" f"<br/>{mtr.code}</p>") xtra = { "channels": ",".join(channels), "lat": str(row.get("lat")), "long": str(row.get("lon")), } xtra["twitter"] = ( ("%s,%s (%s) ASOS reports %s -- %s") % (nm, st, mtr.iemid, msg, mtr.code))[:TWEET_CHARS] jmsgs.append([jtxt, jhtml, xtra]) return jmsgs