Python dedupeの例、avwx.parsing.core.dedupe Pythonの例

コード例 #1

0

ファイルを表示

 def test_dedupe(self):
     """Tests list deduplication"""
     for before, after in (
         ([1, 2, 3, 2, 1], [1, 2, 3]),
         ([4, 4, 4, 4], [4]),
         ([1, 5, 1, 1, 3, 5], [1, 5, 3]),
     ):
         self.assertEqual(core.dedupe(before), after)
     for before, after in (
         ([1, 2, 3, 2, 1], [1, 2, 3, 2, 1]),
         ([4, 4, 4, 4], [4]),
         ([1, 5, 1, 1, 3, 5], [1, 5, 1, 3, 5]),
     ):
         self.assertEqual(core.dedupe(before, only_neighbors=True), after)

コード例 #2

0

ファイルを表示

def parse_in_line(line: str, units: Units) -> {str: str}:
    """
    Parser for the International TAF forcast variant
    """
    wxdata = core.dedupe(line.split())
    wxdata = core.sanitize_report_list(wxdata)
    retwx = {"sanitized": " ".join(wxdata)}
    (
        wxdata,
        retwx["type"],
        retwx["start_time"],
        retwx["end_time"],
    ) = get_type_and_times(wxdata)
    wxdata, retwx["wind_shear"] = get_wind_shear(wxdata)
    (
        wxdata,
        retwx["wind_direction"],
        retwx["wind_speed"],
        retwx["wind_gust"],
        _,
    ) = core.get_wind(wxdata, units)
    if "CAVOK" in wxdata:
        retwx["visibility"] = core.make_number("CAVOK")
        retwx["clouds"] = []
        wxdata.pop(wxdata.index("CAVOK"))
    else:
        wxdata, retwx["visibility"] = core.get_visibility(wxdata, units)
        wxdata, retwx["clouds"] = core.get_clouds(wxdata)
    (
        retwx["other"],
        retwx["altimeter"],
        retwx["icing"],
        retwx["turbulence"],
    ) = get_alt_ice_turb(wxdata)
    return retwx

コード例 #3

0

ファイルを表示

def parse_na_line(line: str, units: Units) -> {str: str}:
    """
    Parser for the North American TAF forcast variant
    """
    wxdata = core.dedupe(line.split())
    wxdata = core.sanitize_report_list(wxdata)
    retwx = {"sanitized": " ".join(wxdata)}
    (
        wxdata,
        retwx["type"],
        retwx["start_time"],
        retwx["end_time"],
    ) = get_type_and_times(wxdata)
    wxdata, retwx["wind_shear"] = get_wind_shear(wxdata)
    (
        wxdata,
        retwx["wind_direction"],
        retwx["wind_speed"],
        retwx["wind_gust"],
        _,
    ) = core.get_wind(wxdata, units)
    wxdata, retwx["visibility"] = core.get_visibility(wxdata, units)
    wxdata, retwx["clouds"] = core.get_clouds(wxdata)
    (
        retwx["other"],
        retwx["altimeter"],
        retwx["icing"],
        retwx["turbulence"],
    ) = get_alt_ice_turb(wxdata)
    return retwx

コード例 #4

0

ファイルを表示

ファイル: scrape.py プロジェクト: china1885/avwx-engine

 def _clean_report(self, report: str) -> str:
     """Replaces all *whitespace elements with a single space if enabled"""
     if not self._strip_whitespace:
         return report
     if isinstance(report, list):
         return dedupe(" ".join(r.split()) for r in report)
     return " ".join(report.split())

コード例 #5

0

ファイルを表示

def parse_na(report: str) -> (MetarData, Units):
    """
    Parser for the North American METAR variant
    """
    units = Units(**NA_UNITS)
    wxresp = {"raw": report}
    clean = core.sanitize_report_string(report)
    wxdata, wxresp["remarks"] = get_remarks(clean)
    wxdata = core.dedupe(wxdata)
    wxdata = core.sanitize_report_list(wxdata)
    wxresp["sanitized"] = " ".join(wxdata + [wxresp["remarks"]])
    wxdata, wxresp["station"], wxresp["time"] = core.get_station_and_time(
        wxdata)
    wxdata, wxresp["runway_visibility"] = get_runway_visibility(wxdata)
    wxdata, wxresp["clouds"] = core.get_clouds(wxdata)
    (
        wxdata,
        wxresp["wind_direction"],
        wxresp["wind_speed"],
        wxresp["wind_gust"],
        wxresp["wind_variable_direction"],
    ) = core.get_wind(wxdata, units)
    wxdata, wxresp["altimeter"] = get_altimeter(wxdata, units, "NA")
    wxdata, wxresp["visibility"] = core.get_visibility(wxdata, units)
    wxdata, wxresp["temperature"], wxresp["dewpoint"] = get_temp_and_dew(
        wxdata)
    condition = core.get_flight_rules(wxresp["visibility"],
                                      core.get_ceiling(wxresp["clouds"]))
    wxresp["other"], wxresp["wx_codes"] = get_wx_codes(wxdata)
    wxresp["flight_rules"] = FLIGHT_RULES[condition]
    wxresp["remarks_info"] = remarks.parse(wxresp["remarks"])
    wxresp["time"] = core.make_timestamp(wxresp["time"])
    return MetarData(**wxresp), units

コード例 #6

0

ファイルを表示

ファイル: taf.py プロジェクト: mralext20/avwx-engine

def parse_in_line(line: str, units: Units) -> Dict[str, str]:
    """Parser for the International TAF forcast variant"""
    data = core.dedupe(line.split())
    data = sanitization.sanitize_report_list(data, remove_clr_and_skc=False)
    ret = {"sanitized": " ".join(data)}
    (
        data,
        ret["type"],
        ret["start_time"],
        ret["end_time"],
        ret["transition_start"],
    ) = get_type_and_times(data)
    data, ret["wind_shear"] = get_wind_shear(data)
    (
        data,
        ret["wind_direction"],
        ret["wind_speed"],
        ret["wind_gust"],
        _,
    ) = core.get_wind(data, units)
    if "CAVOK" in data:
        ret["visibility"] = core.make_number("CAVOK")
        ret["clouds"] = []
        data.pop(data.index("CAVOK"))
    else:
        data, ret["visibility"] = core.get_visibility(data, units)
        data, ret["clouds"] = core.get_clouds(data)
    (
        ret["other"],
        ret["altimeter"],
        ret["icing"],
        ret["turbulence"],
    ) = get_alt_ice_turb(data)
    return ret

コード例 #7

0

ファイルを表示

ファイル: metar.py プロジェクト: mralext20/avwx-engine

def sanitize(report: str) -> Tuple[str, str, List[str]]:
    """Returns a sanitized report, remarks, and elements ready for parsing"""
    clean = sanitization.sanitize_report_string(report)
    data, remark_str = get_remarks(clean)
    data = core.dedupe(data)
    data = sanitization.sanitize_report_list(data)
    clean = " ".join(data)
    if remark_str:
        clean += " " + remark_str
    return clean, remark_str, data

コード例 #8

0

ファイルを表示

ファイル: sanitization.py プロジェクト: seanodea/avwx-engine

def sanitize_report_list(wxdata: [str],
                         remove_clr_and_skc: bool = True) -> [str]:
    """
    Sanitize wxData

    We can remove and identify "one-off" elements and fix other issues before parsing a line
    """
    for i, item in reversed(list(enumerate(wxdata))):
        ilen = len(item)
        # Remove elements containing only '/'
        if is_unknown(item):
            wxdata.pop(i)
            continue
        # Remove empty wind /////KT
        if item.endswith("KT") and is_unknown(item[:-2]):
            wxdata.pop(i)
            continue
        # Remove RE from wx codes, REVCTS -> VCTS
        if ilen in [4, 6] and item.startswith("RE"):
            wxdata[i] = item[2:]
        # Fix a slew of easily identifiable conditions where a space does not belong
        elif i and extra_space_exists(wxdata[i - 1], item):
            wxdata[i - 1] += wxdata.pop(i)
        # Remove spurious elements
        elif item in ITEM_REMV:
            wxdata.pop(i)
        # Remove 'Sky Clear' from METAR but not TAF
        elif remove_clr_and_skc and item in ["CLR", "SKC"]:
            wxdata.pop(i)
        # Replace certain items
        elif item in ITEM_REPL:
            wxdata[i] = ITEM_REPL[item]
        # Remove amend signifier from start of report ('CCA', 'CCB',etc)
        elif ilen == 3 and item.startswith("CC") and item[2].isalpha():
            wxdata.pop(i)
        # Fix inconsistent 'P6SM' Ex: TP6SM or 6PSM -> P6SM
        elif ilen > 3 and item[-4:] in VIS_PERMUTATIONS:
            wxdata[i] = "P6SM"
        # Fix misplaced KT 22022KTG40
        elif ilen == 10 and "KTG" in item and item[:5].isdigit():
            wxdata[i] = item.replace("KTG", "G") + "KT"
        # Fix backwards KT Ex: 06012G22TK
        if (ilen >= 7 and (item[:3].isdigit() or item[:3] == "VRB")
                and item.endswith("TK")):
            wxdata[i] = item[:-2] + "KT"
        # Fix gust double G Ex: 360G17G32KT
        elif ilen > 10 and item.endswith("KT") and item[3] == "G":
            wxdata[i] = item[:3] + item[4:]
        # Fix leading character mistypes in wind
        elif (ilen > 7 and not item[0].isdigit() and not item.startswith("VRB")
              and item.endswith("KT") and not item.startswith("WS")):
            while not item[0].isdigit() and not item.startswith("VRB"):
                item = item[1:]
            wxdata[i] = item
        # Fix non-G gust Ex: 14010-15KT
        elif ilen == 10 and item.endswith("KT") and item[5] != "G":
            wxdata[i] = item[:5] + "G" + item[6:]
        # Fix leading digits on VRB wind Ex: 2VRB02KT
        elif (ilen > 7 and item.endswith("KT") and "VRB" in item
              and item[0].isdigit() and "Z" not in item):
            while item[0].isdigit():
                item = item[1:]
            wxdata[i] = item
        # Fix wind T
        elif not item.endswith("KT") and (
            (ilen == 6 and item[5] in ["K", "T"] and
             (item[:5].isdigit() or
              (item.startswith("VRB") and item[:3].isdigit()))) or
            (ilen == 9 and item[8] in ["K", "T"] and item[5] == "G" and
             (item[:5].isdigit() or item.startswith("VRB")))):
            wxdata[i] = item[:-1] + "KT"
        # Fix joined TX-TN
        elif ilen > 16 and len(item.split("/")) == 3:
            if item.startswith("TX") and "TN" not in item:
                tn_index = item.find("TN")
                wxdata.insert(i + 1, item[:tn_index])
                wxdata[i] = item[tn_index:]
            elif item.startswith("TN") and item.find("TX") != -1:
                tx_index = item.find("TX")
                wxdata.insert(i + 1, item[:tx_index])
                wxdata[i] = item[tx_index:]
        # Fix situations where a space is missing
        sep = extra_space_needed(item)
        if sep:
            wxdata.insert(i + 1, item[sep:])
            wxdata[i] = item[:sep]
    wxdata = dedupe(wxdata, only_neighbors=True)
    return wxdata