def test_dedupe(self): """Tests list deduplication""" for before, after in ( ([1, 2, 3, 2, 1], [1, 2, 3]), ([4, 4, 4, 4], [4]), ([1, 5, 1, 1, 3, 5], [1, 5, 3]), ): self.assertEqual(core.dedupe(before), after) for before, after in ( ([1, 2, 3, 2, 1], [1, 2, 3, 2, 1]), ([4, 4, 4, 4], [4]), ([1, 5, 1, 1, 3, 5], [1, 5, 1, 3, 5]), ): self.assertEqual(core.dedupe(before, only_neighbors=True), after)
def parse_in_line(line: str, units: Units) -> {str: str}: """ Parser for the International TAF forcast variant """ wxdata = core.dedupe(line.split()) wxdata = core.sanitize_report_list(wxdata) retwx = {"sanitized": " ".join(wxdata)} ( wxdata, retwx["type"], retwx["start_time"], retwx["end_time"], ) = get_type_and_times(wxdata) wxdata, retwx["wind_shear"] = get_wind_shear(wxdata) ( wxdata, retwx["wind_direction"], retwx["wind_speed"], retwx["wind_gust"], _, ) = core.get_wind(wxdata, units) if "CAVOK" in wxdata: retwx["visibility"] = core.make_number("CAVOK") retwx["clouds"] = [] wxdata.pop(wxdata.index("CAVOK")) else: wxdata, retwx["visibility"] = core.get_visibility(wxdata, units) wxdata, retwx["clouds"] = core.get_clouds(wxdata) ( retwx["other"], retwx["altimeter"], retwx["icing"], retwx["turbulence"], ) = get_alt_ice_turb(wxdata) return retwx
def parse_na_line(line: str, units: Units) -> {str: str}: """ Parser for the North American TAF forcast variant """ wxdata = core.dedupe(line.split()) wxdata = core.sanitize_report_list(wxdata) retwx = {"sanitized": " ".join(wxdata)} ( wxdata, retwx["type"], retwx["start_time"], retwx["end_time"], ) = get_type_and_times(wxdata) wxdata, retwx["wind_shear"] = get_wind_shear(wxdata) ( wxdata, retwx["wind_direction"], retwx["wind_speed"], retwx["wind_gust"], _, ) = core.get_wind(wxdata, units) wxdata, retwx["visibility"] = core.get_visibility(wxdata, units) wxdata, retwx["clouds"] = core.get_clouds(wxdata) ( retwx["other"], retwx["altimeter"], retwx["icing"], retwx["turbulence"], ) = get_alt_ice_turb(wxdata) return retwx
def _clean_report(self, report: str) -> str: """Replaces all *whitespace elements with a single space if enabled""" if not self._strip_whitespace: return report if isinstance(report, list): return dedupe(" ".join(r.split()) for r in report) return " ".join(report.split())
def parse_na(report: str) -> (MetarData, Units): """ Parser for the North American METAR variant """ units = Units(**NA_UNITS) wxresp = {"raw": report} clean = core.sanitize_report_string(report) wxdata, wxresp["remarks"] = get_remarks(clean) wxdata = core.dedupe(wxdata) wxdata = core.sanitize_report_list(wxdata) wxresp["sanitized"] = " ".join(wxdata + [wxresp["remarks"]]) wxdata, wxresp["station"], wxresp["time"] = core.get_station_and_time( wxdata) wxdata, wxresp["runway_visibility"] = get_runway_visibility(wxdata) wxdata, wxresp["clouds"] = core.get_clouds(wxdata) ( wxdata, wxresp["wind_direction"], wxresp["wind_speed"], wxresp["wind_gust"], wxresp["wind_variable_direction"], ) = core.get_wind(wxdata, units) wxdata, wxresp["altimeter"] = get_altimeter(wxdata, units, "NA") wxdata, wxresp["visibility"] = core.get_visibility(wxdata, units) wxdata, wxresp["temperature"], wxresp["dewpoint"] = get_temp_and_dew( wxdata) condition = core.get_flight_rules(wxresp["visibility"], core.get_ceiling(wxresp["clouds"])) wxresp["other"], wxresp["wx_codes"] = get_wx_codes(wxdata) wxresp["flight_rules"] = FLIGHT_RULES[condition] wxresp["remarks_info"] = remarks.parse(wxresp["remarks"]) wxresp["time"] = core.make_timestamp(wxresp["time"]) return MetarData(**wxresp), units
def parse_in_line(line: str, units: Units) -> Dict[str, str]: """Parser for the International TAF forcast variant""" data = core.dedupe(line.split()) data = sanitization.sanitize_report_list(data, remove_clr_and_skc=False) ret = {"sanitized": " ".join(data)} ( data, ret["type"], ret["start_time"], ret["end_time"], ret["transition_start"], ) = get_type_and_times(data) data, ret["wind_shear"] = get_wind_shear(data) ( data, ret["wind_direction"], ret["wind_speed"], ret["wind_gust"], _, ) = core.get_wind(data, units) if "CAVOK" in data: ret["visibility"] = core.make_number("CAVOK") ret["clouds"] = [] data.pop(data.index("CAVOK")) else: data, ret["visibility"] = core.get_visibility(data, units) data, ret["clouds"] = core.get_clouds(data) ( ret["other"], ret["altimeter"], ret["icing"], ret["turbulence"], ) = get_alt_ice_turb(data) return ret
def sanitize(report: str) -> Tuple[str, str, List[str]]: """Returns a sanitized report, remarks, and elements ready for parsing""" clean = sanitization.sanitize_report_string(report) data, remark_str = get_remarks(clean) data = core.dedupe(data) data = sanitization.sanitize_report_list(data) clean = " ".join(data) if remark_str: clean += " " + remark_str return clean, remark_str, data
def sanitize_report_list(wxdata: [str], remove_clr_and_skc: bool = True) -> [str]: """ Sanitize wxData We can remove and identify "one-off" elements and fix other issues before parsing a line """ for i, item in reversed(list(enumerate(wxdata))): ilen = len(item) # Remove elements containing only '/' if is_unknown(item): wxdata.pop(i) continue # Remove empty wind /////KT if item.endswith("KT") and is_unknown(item[:-2]): wxdata.pop(i) continue # Remove RE from wx codes, REVCTS -> VCTS if ilen in [4, 6] and item.startswith("RE"): wxdata[i] = item[2:] # Fix a slew of easily identifiable conditions where a space does not belong elif i and extra_space_exists(wxdata[i - 1], item): wxdata[i - 1] += wxdata.pop(i) # Remove spurious elements elif item in ITEM_REMV: wxdata.pop(i) # Remove 'Sky Clear' from METAR but not TAF elif remove_clr_and_skc and item in ["CLR", "SKC"]: wxdata.pop(i) # Replace certain items elif item in ITEM_REPL: wxdata[i] = ITEM_REPL[item] # Remove amend signifier from start of report ('CCA', 'CCB',etc) elif ilen == 3 and item.startswith("CC") and item[2].isalpha(): wxdata.pop(i) # Fix inconsistent 'P6SM' Ex: TP6SM or 6PSM -> P6SM elif ilen > 3 and item[-4:] in VIS_PERMUTATIONS: wxdata[i] = "P6SM" # Fix misplaced KT 22022KTG40 elif ilen == 10 and "KTG" in item and item[:5].isdigit(): wxdata[i] = item.replace("KTG", "G") + "KT" # Fix backwards KT Ex: 06012G22TK if (ilen >= 7 and (item[:3].isdigit() or item[:3] == "VRB") and item.endswith("TK")): wxdata[i] = item[:-2] + "KT" # Fix gust double G Ex: 360G17G32KT elif ilen > 10 and item.endswith("KT") and item[3] == "G": wxdata[i] = item[:3] + item[4:] # Fix leading character mistypes in wind elif (ilen > 7 and not item[0].isdigit() and not item.startswith("VRB") and item.endswith("KT") and not item.startswith("WS")): while not item[0].isdigit() and not item.startswith("VRB"): item = item[1:] wxdata[i] = item # Fix non-G gust Ex: 14010-15KT elif ilen == 10 and item.endswith("KT") and item[5] != "G": wxdata[i] = item[:5] + "G" + item[6:] # Fix leading digits on VRB wind Ex: 2VRB02KT elif (ilen > 7 and item.endswith("KT") and "VRB" in item and item[0].isdigit() and "Z" not in item): while item[0].isdigit(): item = item[1:] wxdata[i] = item # Fix wind T elif not item.endswith("KT") and ( (ilen == 6 and item[5] in ["K", "T"] and (item[:5].isdigit() or (item.startswith("VRB") and item[:3].isdigit()))) or (ilen == 9 and item[8] in ["K", "T"] and item[5] == "G" and (item[:5].isdigit() or item.startswith("VRB")))): wxdata[i] = item[:-1] + "KT" # Fix joined TX-TN elif ilen > 16 and len(item.split("/")) == 3: if item.startswith("TX") and "TN" not in item: tn_index = item.find("TN") wxdata.insert(i + 1, item[:tn_index]) wxdata[i] = item[tn_index:] elif item.startswith("TN") and item.find("TX") != -1: tx_index = item.find("TX") wxdata.insert(i + 1, item[:tx_index]) wxdata[i] = item[tx_index:] # Fix situations where a space is missing sep = extra_space_needed(item) if sep: wxdata.insert(i + 1, item[sep:]) wxdata[i] = item[:sep] wxdata = dedupe(wxdata, only_neighbors=True) return wxdata