def workflow(date): """ Process a given UTC date """ sts = utc(date.year, date.month, date.day) ets = sts + datetime.timedelta(hours=24) for pil in pils.split("|"): cursor.execute( """ SELECT data from products WHERE entered >= %s and entered < %s and substr(pil,1,3) = %s ORDER by entered ASC """, (sts, ets, pil), ) if cursor.rowcount == 0: continue LOG.info("%s %s %s", date, pil, cursor.rowcount) with open("/tmp/afos.tmp", "w") as fh: for row in cursor: fh.write(noaaport_text(row[0])) cmd = "data a %s0000 bogus text/noaaport/%s_%s.txt txt" % ( date.strftime("%Y%m%d"), pil, date.strftime("%Y%m%d"), ) cmd = "pqinsert -p '%s' /tmp/afos.tmp" % (cmd, ) subprocess.call(cmd, shell=True)
def main(argv): """Do Main Things""" fn = argv[1] data = noaaport_text(open(fn).read()) output = open(fn, 'w') output.write(data) output.close()
def main(argv): """Do Main Things""" fn = argv[1] data = noaaport_text(open(fn).read()) output = open(fn, "w") output.write(data) output.close()
def process(order): """ Process this timestamp """ cursor = PGCONN.cursor() ts = datetime.datetime.strptime(order[:6], "%y%m%d").replace(tzinfo=pytz.utc) base = ts - datetime.timedelta(days=2) ceiling = ts + datetime.timedelta(days=2) subprocess.call("tar -xzf %s" % (order, ), shell=True) inserts = 0 deletes = 0 filesparsed = 0 bad = 0 for fn in glob.glob("%s[0-2][0-9].*" % (order[:6], )): content = re.sub(BAD_CHARS, "", open(fn, 'rb').read().decode('ascii', 'ignore')) # Now we are getting closer, lets split by the delimter as we # may have multiple products in one file! for bulletin in content.split("\001"): if bulletin == '': continue try: bulletin = noaaport_text(bulletin) prod = TextProduct(bulletin, utcnow=ts, parse_segments=False) prod.source = XREF_SOURCE.get(prod.source, prod.source) except Exception as exp: if DEBUG: print('Parsing Failure %s' % (exp, )) bad += 1 continue if prod.valid < base or prod.valid > ceiling: # print('Timestamp out of bounds %s %s %s' % (base, prod.valid, # ceiling)) bad += 1 continue table = "products_%s_%s" % (prod.valid.year, ("0712" if prod.valid.month > 6 else "0106")) cursor.execute( """ DELETE from """ + table + """ WHERE pil = %s and entered = %s and source = %s and data = %s """, (prod.afos, prod.valid, prod.source, bulletin)) deletes += cursor.rowcount cursor.execute( """INSERT into """ + table + """ (data, pil, entered, source, wmo) values (%s,%s,%s,%s,%s) """, (bulletin, prod.afos, prod.valid, prod.source, prod.wmo)) inserts += 1 os.unlink(fn) filesparsed += 1 print(("%s Files Parsed: %s Inserts: %s Deletes: %s Bad: %s") % (order, filesparsed, inserts, deletes, bad)) cursor.close() PGCONN.commit()
def dotable(table): """Go main go""" pgconn = get_dbconn('afos') cursor = pgconn.cursor() df = read_sql(""" WITH data as ( SELECT entered, pil, wmo, source, count(*) from """ + table + """ WHERE source is not null and wmo is not null and pil is not null and entered is not null GROUP by entered, pil, wmo, source) select * from data where count > 1 """, pgconn, index_col=None) hits = 0 for _, row in tqdm( df.iterrows(), total=len(df.index), desc=table, disable=False): # get text cursor.execute(""" SELECT data from """ + table + """ WHERE source = %s and entered = %s and pil = %s and wmo = %s """, (row['source'], row['entered'], row['pil'], row['wmo'])) data = [] for row2 in cursor: data.append(noaaport_text(row2[0])) if data[0][11:] == data[1][11:] and len(data) == 2: hits += 1 # delete old entries cursor.execute(""" DELETE from """ + table + """ WHERE source = %s and entered = %s and pil = %s and wmo = %s """, (row['source'], row['entered'], row['pil'], row['wmo'])) # insert without trailing ^C cursor.execute(""" INSERT into """ + table + """ (data, pil, entered, source, wmo) VALUES (%s, %s, %s, %s, %s) """, ( data[0][:-1], row['pil'], row['entered'], row['source'], row['wmo']) ) continue if data[0][11:] != data[1][11:]: o = open('one.txt', 'w') o.write(data[0]) o.close() o = open('two.txt', 'w') o.write(data[1]) o.close() sys.exit() print("%s rows were updated..." % (hits, )) cursor.close() pgconn.commit() pgconn.close()
def main(argv): """Go Main""" pgconn = psycopg2.connect(database='afos', host='iemdb', port=5555, user='******') acursor = pgconn.cursor() pil = argv[1] output = open('%s.txt' % (pil, ), 'a') acursor.execute(""" SELECT data, entered from products WHERE pil = %s ORDER by entered ASC""", (pil, )) for row in acursor: output.write(noaaport_text(row[0])) output.close()
def main(): """Go Main Go.""" pgconn = get_dbconn('afos') acursor = pgconn.cursor('streamer') acursor.execute(""" SELECT pil, entered at time zone 'UTC', source, pil, data from products_2017_0712 WHERE source = 'TJSJ' and entered >= '2017-08-26' and entered < '2017-09-13' """) for row in acursor: fn = "TJSJ/%s_%s.txt" % (row[0].strip(), row[1].strftime("%Y%m%d%H%M")) o = open(fn, 'a') o.write(noaaport_text(row[4])) o.write('\r\r\n\003') o.close() o.close()
def main(): """Go Main Go.""" pgconn = get_dbconn('afos') cursor = pgconn.cursor() cursor.execute("SET TIME ZONE 'UTC'") cursor.execute(""" SELECT min(date(entered)), max(date(entered)) from archived_save """) mindate, maxdate = cursor.fetchone() for date in pd.date_range(mindate, maxdate, freq='D'): cursor.execute(""" SELECT data from archived_save WHERE entered >= %s and entered < %s """, (date, date + datetime.timedelta(days=1))) fn = '/mesonet/tmp/off%s.txt' % (date.strftime("%Y%m%d"), ) fp = open(fn, 'a') for row in cursor: fp.write(noaaport_text(row[0])) fp.close() subprocess.call("gzip %s" % (fn, ), shell=True)
def dotable(table): """Go main go""" pgconn = get_dbconn('afos') cursor = pgconn.cursor() cursor2 = pgconn.cursor() cursor.execute(""" SELECT entered, data, pil, wmo from """ + table + """ WHERE source is null """) failures = 0 updated = 0 noupdates = 0 for row in cursor: product = noaaport_text(row[1]) try: tp = TextProduct(product, utcnow=row[0], parse_segments=False) except Exception as exp: failures += 1 if str(exp).find('Could not parse WMO header!') == -1: print(exp) continue if tp.source is None: failures += 1 continue cursor2.execute(""" UPDATE """ + table + """ SET data = %s, source = %s WHERE source is null and entered = %s and pil = %s and wmo = %s """, (product, tp.source, row[0], row[2], row[3])) if cursor2.rowcount == 0: print("Hmmmm") noupdates = 0 else: updated += 1 print(("%s rows: %s updated: %s failures: %s noupdates: %s" ) % (table, cursor.rowcount, updated, failures, noupdates)) cursor2.close() pgconn.commit()
"""Send products from AFOS database to pyWWA""" from tqdm import tqdm from pyiem.util import noaaport_text, get_dbconn AFOS = get_dbconn('afos') acursor = AFOS.cursor() o = open('flood_emergency_2019.txt', 'a') for year in tqdm(range(2019, 2020)): for suffix in ['0106', '0712']: table = "products_%s_%s" % (year, suffix) acursor.execute(""" SELECT data, source, entered from """ + table + """ WHERE entered > '2018-09-14 12:00' and substr(pil, 1, 3) in ('FFW', 'FFS') and data ~* 'EMERGENCY' ORDER by entered ASC """) for row in acursor: raw = " ".join( row[0].upper().replace("\r", "").replace("\n", " ").split()) if raw.find("FLASH FLOOD EMERGENCY") == -1: continue o.write(noaaport_text(row[0])) print(" Hit %s %s" % (row[1], row[2])) o.close()
def process(): """ Process this timestamp """ for tarfn in glob.glob("9957*tar.Z"): cursor = PGCONN.cursor() subprocess.call("uncompress %s" % (tarfn, ), shell=True) ts = datetime.datetime.strptime(tarfn[9:17], '%Y%m%d') ts = ts.replace(hour=23, minute=59, tzinfo=pytz.utc) tar = tarfile.open(tarfn[:-2], 'r') memory = [] for member in tar.getmembers(): fobj = tar.extractfile(member) content = re.sub(BAD_CHARS, "", fobj.read()) + ENDDELIM pos = 0 good = 0 bad = 0 deleted = 0 for match in re.finditer(DELIMITER, content): pos1 = match.start() bulletin = "000 \r\r" + content[pos:pos1] pos = match.end() if len(bulletin) < 20: bad += 1 continue bulletin = noaaport_text(bulletin) try: prod = TextProduct(bulletin, utcnow=ts, parse_segments=False) except Exception as exp: bad += 1 print('Parsing Failure %s\n%s' % (fobj.name, exp)) continue if prod.valid.year != ts.year: bad += 1 print('Invalid timestamp, year mismatch') continue table = "products_%s_%s" % (prod.valid.year, ("0712" if prod.valid.month > 6 else "0106")) key = "%s_%s_%s" % ( prod.afos, prod.valid.strftime("%Y%m%d%H%M"), prod.source) if key not in memory: cursor.execute( """ DELETE from """ + table + """ WHERE pil = %s and entered = %s and source = %s """, (prod.afos, prod.valid, prod.source)) deleted += cursor.rowcount memory.append(key) cursor.execute( """INSERT into """ + table + """ (data, pil, entered, source, wmo) values (%s,%s,%s,%s,%s) """, (bulletin, prod.afos, prod.valid, prod.source, prod.wmo)) good += 1 subprocess.call("compress %s" % (tarfn[:-2], ), shell=True) print(("Processed %s Good: %s Bad: %s Deleted: %s") % (tarfn, good, bad, deleted)) if len(content) > 1000 and good < 5: print("ABORT!") sys.exit() cursor.close() PGCONN.commit()
def test_noaaport_text(): """See that we do what we expect with noaaport text processing""" data = util.get_test_file('WCN.txt') res = util.noaaport_text(data) assert res[:11] == "\001\r\r\n098 \r\r\n"
def main(argv): """go""" pgconn = psycopg2.connect(database='postgis', host='localhost', port=5555) cursor = pgconn.cursor() cursor2 = pgconn.cursor() table = "warnings_%s" % (argv[1],) cursor.execute(""" SELECT oid, ugc, issue at time zone 'UTC', expire at time zone 'UTC', init_expire at time zone 'UTC', report, svs, phenomena, eventid, significance from """+table+""" where issue is null ORDER by oid ASC """) print("Found %s entries to process..." % (cursor.rowcount, )) for row in cursor: oid = row[0] ugc = row[1] report = row[5] if row[6] is None: svss = [] else: svss = row[6].split("__") phenomena = row[7] eventid = row[8] significance = row[9] issue0 = row[2].replace( tzinfo=pytz.timezone("UTC")) if row[2] is not None else None expire0 = row[3].replace( tzinfo=pytz.timezone("UTC")) if row[3] is not None else None init_expire0 = row[4].replace( tzinfo=pytz.timezone("UTC")) if row[4] is not None else None svss.insert(0, report) expire1 = None issue1 = None init_expire1 = None msg = [] print(" Found %s svss to process through" % (len(svss), )) for i, svs in enumerate(svss): if svs.strip() == '': continue try: prod = parser(noaaport_text(svs)) except Exception, exp: print("%s %s" % (oid, exp)) if i == 0: print("FATAL ABORT as first product failed") break continue for segment in prod.segments: found = False print(segment.ugcs) for this_ugc in segment.ugcs: if str(this_ugc) == ugc: found = True if not found: print("Did not find %s in segment" % (ugc, )) continue for vtec in segment.vtec: if (vtec.phenomena != phenomena or vtec.etn != eventid or vtec.significance != significance): print("skipping segment as it does not match") continue # if (vtec.etn != eventid and # vtec.significance == 'W' and # vtec.phenomena in ('SV', 'TO')): # print(("Updating eventid! old: %s new: %s" # ) % (eventid, vtec.etn)) # cursor2.execute(""" # UPDATE """+table+""" SET eventid = %s WHERE oid = %s # """, (vtec.etn, oid)) if i == 0: init_expire1 = (vtec.endts if vtec.endts is not None else prod.valid + datetime.timedelta(hours=144)) expire1 = init_expire1 issue1 = (vtec.begints if vtec.begints is not None else prod.valid) if vtec.begints is not None: if vtec.begints != issue1: msg.append(("%s %s %s %s %s" ) % ('I', i, ugc, vtec.action, p(vtec.begints))) issue1 = vtec.begints if vtec.endts is not None: if vtec.endts != expire1: msg.append(("%s %s %s %s %s" ) % ('E', i, ugc, vtec.action, p(vtec.endts))) expire1 = vtec.endts if vtec.action in ['EXA', 'EXB']: issue1 = (prod.valid if vtec.begints is None else vtec.begints) if vtec.action in ['UPG', 'CAN']: expire1 = prod.valid if issue0 != issue1 or expire0 != expire1: print("\n".join(msg)) if issue0 != issue1: print(("%s %s.%s.%s Issue0: %s Issue1: %s" ) % (ugc, phenomena, significance, eventid, p(issue0), p(issue1))) cursor2.execute("""UPDATE """+table+""" SET issue = %s WHERE oid = %s """, (issue1, oid)) if expire0 != expire1: print(("%s %s.%s.%s Expire0: %s Expire1: %s" ) % (ugc, phenomena, significance, eventid, p(expire0), p(expire1))) cursor2.execute("""UPDATE """+table+""" SET expire = %s WHERE oid = %s """, (expire1, oid)) if init_expire0 != init_expire1: print(("%s %s.%s.%s Init_Expire0: %s Init_Expire1: %s" ) % (ugc, phenomena, significance, eventid, p(init_expire0), p(init_expire1))) cursor2.execute(""" UPDATE """+table+""" SET init_expire = %s WHERE oid = %s """, (init_expire1, oid))
def process(order): """ Process this timestamp """ cursor = PGCONN.cursor() for tarfn in glob.glob("NWSTG*tar.Z"): subprocess.call("uncompress %s" % (tarfn, ), shell=True) ts = datetime.datetime.strptime(tarfn[6:14], '%Y%m%d') ts = ts.replace(hour=23, minute=59, tzinfo=pytz.utc) tar = tarfile.open(tarfn[:-2], 'r') memory = [] for member in tar.getmembers(): fobj = tar.extractfile(member) if not fobj.name.startswith("TEXT_"): continue content = (re.sub(BAD_CHARS, "", fobj.read())).replace("\r\r", "") parts = fobj.name.strip().split("_") ttaaii = parts[1] source = parts[2] delimiter = "%s %s" % (ttaaii, source) # Filter content back to the start of the ttaaii pos = content.find(delimiter) if pos == -1: print(('Skipping can not find %s in product %s') % (delimiter, fobj.name)) continue content = content[pos:] # Now we are getting closer, lets split by the delimter as we # may have multiple products in one file! for bulletin in content.split(delimiter): if len(bulletin) == 0: continue bulletin = "000\n%s%s" % (delimiter, bulletin) bulletin = noaaport_text(bulletin) try: prod = TextProduct(bulletin, utcnow=ts, parse_segments=False) except Exception as exp: print('Parsing Failure %s\n%s' % (fobj.name, exp)) continue if prod.valid.year != ts.year: print('Invalid timestamp, year mismatch') continue table = "products_%s_%s" % (prod.valid.year, ("0712" if prod.valid.month > 6 else "0106")) key = "%s_%s_%s" % ( prod.afos, prod.valid.strftime("%Y%m%d%H%M"), prod.source) if key not in memory: cursor.execute( """ DELETE from """ + table + """ WHERE pil = %s and entered = %s and source = %s """, (prod.afos, prod.valid, prod.source)) memory.append(key) cursor.execute( """INSERT into """ + table + """ (data, pil, entered, source, wmo) values (%s,%s,%s,%s,%s) """, (bulletin, prod.afos, prod.valid, source, ttaaii)) subprocess.call("compress %s" % (tarfn[:-2], ), shell=True) cursor.close() PGCONN.commit()
def test_noaaport_text_no_ldm_sequence(): """Test that we deal with not having an LDM sequence number.""" data = "AAAAAA\r\r\n" + util.get_test_file("WCN.txt") res = util.noaaport_text(data) assert res[:11] == "\001\r\r\n000 \r\r\n"
def test_noaaport_text_cruft_at_top(): """Test that we properly remove empty lines at the top.""" data = "\r\r\r\n\r\n\r\r\r\r\r\n" + util.get_test_file("WCN.txt") res = util.noaaport_text(data) assert res[:11] == "\001\r\r\n098 \r\r\n"