Python alltext Examples, fplan.extract.html_helper.alltext Python Examples

Example #1

0

Show file

File: ee_parse_sigpoints2.py Project: avl/SwFlightPlanner

def ee_parse_sigpoints2():
    sigs=[]
    parser=lxml.html.HTMLParser()
    airac_date=get_airac_date()
    url="/%s/html/eAIP/EE-ENR-4.4-en-GB.html#ENR-4.4"%(airac_date,)
    data,date=fetchdata.getdata(url,country='ee')    
    parser.feed(data)
    tree=parser.close()
    for tab in tree.xpath(".//table"):        
        for idx,cand in enumerate(tab.xpath(".//tr")):
            if len(cand.getchildren())!=4:
                continue
            if idx==0: continue            
            sig=dict()
            name,coord,ats,remark=cand.getchildren()            
            nametxt=alltext(name).strip()
            coordtxt=alltext(coord).strip()
            
            if idx==1:
                assert nametxt=='1' and coordtxt=='2' 
                continue
            print "Name:",nametxt
            print"coord:",coordtxt
            sig['url']=url
            sig['date']=date
            sig['name']=nametxt            
            sig['short']=nametxt
            sig['kind']='sig. point'        
            subed=re.sub(ur"[\n\s]+"," ",coordtxt)
            sig['pos']=mapper.anyparse(subed)
            sigs.append(sig)
    return sigs

Example #2

0

Show file

File: ee_parse_sigpoints2.py Project: dimme/SwFlightPlanner

def ee_parse_sigpoints2():
    sigs = []
    parser = lxml.html.HTMLParser()
    airac_date = get_airac_date()
    url = "/%s/html/eAIP/EE-ENR-4.4-en-GB.html#ENR-4.4" % (airac_date, )
    data, date = fetchdata.getdata(url, country='ee')
    parser.feed(data)
    tree = parser.close()
    for tab in tree.xpath(".//table"):
        for idx, cand in enumerate(tab.xpath(".//tr")):
            if len(cand.getchildren()) != 4:
                continue
            if idx == 0: continue
            sig = dict()
            name, coord, ats, remark = cand.getchildren()
            nametxt = alltext(name).strip()
            coordtxt = alltext(coord).strip()

            if idx == 1:
                assert nametxt == '1' and coordtxt == '2'
                continue
            print "Name:", nametxt
            print "coord:", coordtxt
            sig['url'] = url
            sig['date'] = date
            sig['name'] = nametxt
            sig['short'] = nametxt
            sig['kind'] = 'sig. point'
            subed = re.sub(ur"[\n\s]+", " ", coordtxt)
            sig['pos'] = mapper.anyparse(subed)
            sigs.append(sig)
    return sigs

Example #3

0

Show file

def get_cur_airac():
    data, date = fetchdata.getdata("/aiseaip", country='ev')
    parser = lxml.html.HTMLParser()
    parser.feed(data)
    tree = parser.close()
    for div in tree.xpath(".//div"):
        at = alltext(div)
        if at.count("eAIP"):
            print "Matching:", at
            m = re.match(
                r".*CURRENTLY EFFECTIVE eAIP:[\n\s]*(\d{1,2}.[A-Z]+.\d{4}).AIRAC.*",
                alltext(div), re.DOTALL)
            if m:
                return m.groups()[0]
    return None

Example #4

0

Show file

File: ee_common.py Project: dimme/SwFlightPlanner

def get_airac_date():
    urlbase = "/index.php?option=com_content&view=article&id=129&Itemid=2&lang=en"
    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(urlbase, country='ee_base')
    parser.feed(data)
    tree = parser.close()
    for x in list(tree.xpath(".//p")) + list(tree.xpath(".//li")):
        txt = alltext(x)
        print "par", txt

        m = re.match(
            ur".*Current\s*eAIP\s*with\s*effective\s*date\s*(\d+)\s*([A-Z]+)\s*(\d+).*",
            txt, re.UNICODE)
        if m:
            day, months, year = m.groups()
            monthi = dict(JAN=1,
                          FEB=2,
                          MAR=3,
                          APR=4,
                          MAY=5,
                          JUN=6,
                          JUL=7,
                          AUG=8,
                          SEP=9,
                          OCT=10,
                          NOV=11,
                          DEC=12)[months]
            return "%04d-%02d-%02d" % (int(year), int(monthi), int(day))
    raise Exception("No airac date")

Example #5

0

Show file

File: ev_parse_tma.py Project: avl/SwFlightPlanner

def ev_parse_obst():
    cur_airac = get_cur_airac()
    url = "/eAIPfiles/%s-AIRAC/html/eAIP/EV-ENR-5.4-en-GB.html" % (cur_airac,)
    # url="/EV-ENR-5.4-en-GB.html"
    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(url, country="ev")
    parser.feed(data)
    tree = parser.close()
    got_fir = False
    res = []
    for table in tree.xpath("//table"):
        for row in table.xpath(".//tr"):
            tds = row.xpath(".//td")
            if len(tds) != 5:
                continue
            name, type, coord, elev, light = [alltext(x) for x in tds]
            elev, height = elev.split("/")
            res.append(
                dict(
                    name=name,
                    pos=mapper.parsecoord(coord),
                    height=mapper.parse_elev(height.strip()),
                    elev=mapper.parse_elev(elev),
                    lighting=light,
                    kind=type,
                )
            )
    return res

Example #6

0

Show file

File: ee_common.py Project: avl/SwFlightPlanner

def get_airac_date():
    urlbase="/index.php?option=com_content&view=article&id=129&Itemid=2&lang=en"
    parser=lxml.html.HTMLParser()
    data,date=fetchdata.getdata(urlbase,country='ee_base')
    parser.feed(data)
    tree=parser.close()
    for x in list(tree.xpath(".//p"))+list(tree.xpath(".//li")):    
        txt=alltext(x)
        print "par",txt
        
        m=re.match(ur".*Current\s*eAIP\s*with\s*effective\s*date\s*(\d+)\s*([A-Z]+)\s*(\d+).*",txt,re.UNICODE)
        if m:
            day,months,year=m.groups()
            monthi=dict(JAN=1,
                        FEB=2,
                        MAR=3,
                        APR=4,
                        MAY=5,
                        JUN=6,
                        JUL=7,
                        AUG=8,
                        SEP=9,
                        OCT=10,
                        NOV=11,
                        DEC=12)[months]
            return "%04d-%02d-%02d"%(int(year),int(monthi),int(day))        
    raise Exception("No airac date")

Example #7

0

Show file

File: ev_parse_airac.py Project: avl/SwFlightPlanner

def get_cur_airac():
    data,date=fetchdata.getdata("/aiseaip",country='ev')
    parser=lxml.html.HTMLParser()
    parser.feed(data)
    tree=parser.close()    
    for div in tree.xpath(".//div"):
        at=alltext(div)
        if at.count("eAIP"):
            print "Matching:",at
            m=re.match(r".*CURRENTLY EFFECTIVE eAIP:[\n\s]*(\d{1,2}.[A-Z]+.\d{4}).AIRAC.*",alltext(div),re.DOTALL)
            if m:
                return m.groups()[0]
    return None

Example #8

0

Show file

File: ev_parse_sigpoints.py Project: dimme/SwFlightPlanner

def ev_parse_sigpoints():
    out=[]
    parser=lxml.html.HTMLParser()
    airac=get_cur_airac()
    url="/eAIPfiles/%s-AIRAC/html/eAIP/EV-ENR-4.4-en-GB.html"%(airac)
    data,date=fetchdata.getdata(url,country='ev')
    parser.feed(data)
    tree=parser.close()
    for table in tree.xpath("//table"):
        #print "Table with %d children"%(len(table.getchildren()),)
        rows=list(table.xpath(".//tr"))
        for row in rows:
            hdr=list(row.xpath(".//th"))
            if hdr: continue
            cols=list(row.xpath(".//td"))
            pos=mapper.parsecoord(alltext(cols[1]))
            nameraw=alltext(cols[0])
            print "raw:",repr(nameraw)
            name,=re.match(ur"\s*(\w{5})\s*",nameraw).groups()

            out.append(dict(name=name,
                kind='sig. point',
                pos=pos))
              
    for manual in """PARKS:570014N 0241039E:entry/exit point
VISTA:565002N 0241034E:entry/exit point
ARNIS:565427N 0234611E:entry/exit point
KISHI:565609N 0234608E:entry/exit point
HOLDING WEST:565530N 0235327E:holding point
HOLDING EAST:565351N 0240313E:holding point""".split("\n"):
        name,poss,kind=manual.split(":")
        out.append(dict(
            name=name.strip(),
            pos=mapper.parsecoord(poss),            
            kind=kind))
        

    return out

Example #9

0

Show file

def ep_parse_wikipedia_airports(url):
    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(url, country="wikipedia")
    parser.feed(data)
    tree = parser.close()
    res = []
    for table in tree.xpath("//table"):
        for nr, row in enumerate(table.xpath(".//tr")):
            cols = list([alltext(x) for x in row.xpath(".//td")])
            print "#", nr, ": ", cols
            if nr == 0:
                if len(cols) == 0 or cols[0].strip() != "Airport":
                    break
                assert cols[3].strip() == "ICAO"
                assert cols[4].strip() == "Purpose"
                assert cols[5].strip().count("El")
                assert cols[9].strip() == "Coordinates"
            else:
                purpose = cols[4].strip()
                if purpose.count("Unused"): continue
                if purpose.count("Closed"): continue
                if purpose.count("Liquidated"): continue
                if purpose == "Military": continue  #Just military
                icao = cols[3].strip()
                if icao == "": icao = "ZZZZ"
                name = cols[0].strip()
                #print "lats:",row.xpath(".//span[@class='latitude']")
                lat, = alltexts(row.xpath(".//span[@class='latitude']"))
                lon, = alltexts(row.xpath(".//span[@class='longitude']"))
                coords = fixup(lat.strip() + " " + lon.strip())
                elevft = float(cols[5].strip())
                res.append(
                    dict(pos=mapper.parsecoord(coords),
                         name=name,
                         elev=elevft / 0.3048,
                         icao=icao,
                         date=date,
                         url=url))

    return res

Example #10

0

Show file

File: ev_parse_tma.py Project: dimme/SwFlightPlanner

def ev_parse_obst():
    cur_airac = get_cur_airac()
    url = "/eAIPfiles/%s-AIRAC/html/eAIP/EV-ENR-5.4-en-GB.html" % (cur_airac, )
    #url="/EV-ENR-5.4-en-GB.html"
    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(url, country="ev")
    parser.feed(data)
    tree = parser.close()
    got_fir = False
    res = []
    for table in tree.xpath("//table"):
        for row in table.xpath(".//tr"):
            tds = row.xpath(".//td")
            if len(tds) != 5: continue
            name, type, coord, elev, light = [alltext(x) for x in tds]
            elev, height = elev.split("/")
            res.append(
                dict(name=name,
                     pos=mapper.parsecoord(coord),
                     height=mapper.parse_elev(height.strip()),
                     elev=mapper.parse_elev(elev),
                     lighting=light,
                     kind=type))
    return res

Example #11

0

Show file

File: osm_airfields.py Project: avl/SwFlightPlanner

def parse_info():
    out=[]
    for fname in os.listdir("fplan/extract/ads"):
        print "Processing",fname
        p=os.path.join("fplan/extract/ads",fname)
        
        data=open(p).read()
        parser=lxml.html.HTMLParser()
        parser.feed(data)
        tree=parser.close()
        ai=None
        for table in tree.xpath(".//table"):
            #print "New table in",fname
            anyap=False
            for idx,tr in enumerate(table.xpath("tr")):
                tds=[alltext(td).strip() for td in tr.xpath("td|th")]
                if idx==0:
                    for i,td in enumerate(tds):
                        if td.lower().count("airport") or td.lower().count("name"):
                            #print "Td",td,"contains airport",i
                            ai=i
                            break
                    #print "head tds:",tds
                    #assert(ai!=None)
                    #print "ai=",ai
                    continue
                icao=None
                #print "Reg row",repr(tds)
                for i,td in enumerate(tds):
                    m=re.match(r".*\b([A-Z]{4})\b.*",td)                    
                    if m:
                        #print "Match:",tds
                        possicao,=m.groups()
                        if possicao=='ICAO' or possicao=='IATA': 
                            continue
                        possname=tds[ai]
                        if len(possname)<=3:
                            continue
                        name=possname
                        icao=possicao
                        break
                if icao:
                    if type(name)!=unicode:
                        name=unicode(name,"utf8")
                    
                    out.append((name,icao))
                    anyap=True
                    
            if anyap:
                ap=False
                for name,icao in out:
                    if name.lower().count("airport"):
                        ap=True
                if ap==False:
                    print "-------------------------------------"
                    for name,icao in out:
                        print name,icao
                    print "^None of the airport names contained the word airport:",fname
                    raise Exception("Not any airport name")
                
        
    out.append((u"Isle of Man Airport",u"EGNS"))
    out.append((u"Guernsey Airport",u"EGJB"))
    out.append((u"Jersey",u"EGJJ"))
    return out

Example #12

0

Show file

File: ee_parse_airfields2.py Project: dimme/SwFlightPlanner

def ee_parse_airfields2():
    ads = []
    spaces = []
    airac_date = get_airac_date()
    print "airac", airac_date
    overview_url = "/%s/html/eAIP/EE-AD-0.6-en-GB.html" % (airac_date, )

    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(overview_url, country='ee')
    parser.feed(data)
    tree = parser.close()
    icaos = []
    for cand in tree.xpath(".//h3"):
        txts = alltexts(cand.xpath(".//a"))
        aps = re.findall(r"EE[A-Z]{2}", " ".join(txts))
        if aps:
            icao, = aps
            if alltext(cand).count("HELIPORT"):
                print "Ignore heliport", icao
                continue
            icaos.append(icao)

    for icao in icaos:
        ad = dict(icao=icao)
        url = "/%s/html/eAIP/EE-AD-2.%s-en-GB.html" % (airac_date, icao)
        data, date = fetchdata.getdata(url, country='ee')
        parser.feed(data)
        tree = parser.close()
        thrs = []

        for h3 in tree.xpath(".//h3"):
            txt = alltext(h3)
            print repr(txt)
            ptrn = ur"\s*%s\s+[—-]\s+(.*)" % (unicode(icao.upper()), )
            m = re.match(ptrn, txt, re.UNICODE)
            if m:
                assert not 'name' in ad
                ad['name'] = m.groups()[0]

        for tr in tree.xpath(".//tr"):
            txt = alltext(tr)
            m = re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*",
                         txt)
            #print "Matching,",txt,":",m
            if m:
                crds, = m.groups()
                ad['pos'] = mapper.anyparse(crds)

        space = dict()
        for table in tree.xpath(".//table"):
            for tr in table.xpath(".//tr"):
                trtxt = alltext(tr)
                if trtxt.count("Designation and lateral limits"):
                    space = dict()
                    coords = tr.getchildren()[2]
                    lines = alltext(coords).split("\n")
                    if lines[0].strip() == 'NIL':
                        continue

                    zname, what, spill = re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)",
                                                  lines[0]).groups()
                    if spill and spill.strip():
                        rest = [spill] + lines[1:]
                    else:
                        rest = lines[1:]
                    what = what.strip()
                    assert ad['name'].upper().strip().count(
                        zname.upper().strip())
                    assert what in ['FIZ', 'TIZ', 'CTR']
                    space['type'] = what
                    space['points'] = mapper.parse_coord_str("\n".join(rest))

                    space['name'] = zname + " " + what
                    space['date'] = date
                    space['url'] = fetchdata.getrawurl(url, 'ee')

                if trtxt.count("Vertical limits"):
                    vlim = alltext(tr.getchildren()[2])
                    if vlim.strip() == 'NIL': continue
                    space['floor'], space['ceiling'] = vlim.split(" to ")

                #space['freqs']=x

        #hlc=False
        for h4 in tree.xpath(".//h4"):
            txt = alltext(h4)
            if txt.lower().count("charts"):
                par = h4.getparent()
                for table in par.xpath(".//table"):
                    for idx, tr in enumerate(table.xpath(".//tr")):
                        name,page=\
                            tr.getchildren()
                        nametxt = alltext(name)
                        print "nametxt:", nametxt, "link:"
                        for reg, variant in [
                            (r"Aerodrome.*Chart.*", ""),
                            (r"Landing.*Chart.*", "landing"),
                            (r".*Parking.*Chart.*", "parking"),
                            (r".*Visual.*Approach.*|.*\bVAC\b.*", "vac")
                        ]:
                            if re.match(reg, nametxt):
                                for a in page.xpath(".//a"):
                                    print "linklabel", a.text
                                    print "attrib:", a.attrib
                                    href = a.attrib['href']
                                    print "Bef repl", href
                                    if href.lower().endswith("pdf"):
                                        href = href.replace(
                                            "../../graphics",
                                            "/%s/graphics" % (airac_date, ))
                                        print "href:", href, airac_date
                                        assert href
                                        parse_landing_chart.help_plc(
                                            ad,
                                            href,
                                            icao,
                                            ad['pos'],
                                            "ee",
                                            variant=variant)
                                        """arp=ad['pos']
                                        lc=parse_landing_chart.parse_landing_chart(
                                                href,
                                                icao=icao,
                                                arppos=arp,country="ee")
                                        assert lc
                                        if lc:
                                            ad['adcharturl']=lc['url']
                                            ad['adchart']=lc
                                            hlc=True
                                            #chartblobnames.append(lc['blobname'])
                                        """
        #assert hlc
        for h4 in tree.xpath(".//h4"):
            txt = alltext(h4)
            if txt.count("RUNWAY PHYSICAL"):
                par = h4.getparent()

                for table in par.xpath(".//table"):
                    prevnametxt = ""
                    for idx, tr in enumerate(table.xpath(".//tr")):
                        if idx == 0:
                            fc = alltext(tr.getchildren()[0])
                            print "FC", fc
                            if not fc.count("Designations"):
                                break  #skip table
                        if idx < 2: continue
                        if len(tr.getchildren()) == 1: continue
                        print "c:", tr.getchildren(), alltexts(
                            tr.getchildren())
                        desig, trubrg, dims, strength, thrcoord, threlev = tr.getchildren(
                        )
                        rwy = re.match(r"(\d{2}[LRC]?)", alltext(desig))
                        altc = alltext(thrcoord)
                        print "Matching", altc
                        print "rwymatch:", alltext(desig)
                        m = re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*",
                                     altc, re.DOTALL | re.MULTILINE)
                        if m:
                            lat, lon = m.groups()
                            print "Got latlon", lat, lon
                            thrs.append(
                                dict(pos=mapper.parse_coords(lat, lon),
                                     thr=rwy.groups()[0]))

        space['freqs'] = []
        for h4 in tree.xpath(".//h4"):
            txt = alltext(h4)
            if txt.count("ATS COMMUNICATION"):
                par = h4.getparent()
                for table in par.xpath(".//table"):
                    for idx, tr in enumerate(table.xpath(".//tr")):
                        print "cs", repr(tr.getchildren()), alltexts(
                            tr.getchildren())
                        print len(tr.getchildren())
                        if len(tr.getchildren()) != 5:
                            if "".join(alltexts(
                                    tr.getchildren())).count(u"EMERG"):
                                continue  #Sometimes emergency freq is listed, and then it is without callsign
                        service,callsign,frequency,hours,remarks=\
                            tr.getchildren()
                        callsigntxt = alltext(callsign)
                        if idx < 2:
                            if idx == 0:
                                assert callsigntxt.strip() == "Call sign"
                            if idx == 1:
                                assert callsigntxt.strip() == "2"
                            continue
                        ftext = alltext(frequency)
                        print "matching freq", ftext
                        for freq in re.findall(ur"\b\d{3}\.\d{1,3}", ftext):
                            freqmhz = float(freq)
                            space['freqs'].append(
                                (callsigntxt.strip(), freqmhz))

        if space and 'points' in space:
            assert 'freqs' in space
            assert 'points' in space
            assert 'floor' in space
            assert 'ceiling' in space
            assert 'type' in space
            spaces.append(space)
        if thrs:
            ad['runways'] = rwy_constructor.get_rwys(thrs)

        aip_text_documents.help_parse_doc(ad,
                                          url,
                                          icao,
                                          "ee",
                                          title="General Information",
                                          category="general")

        ad['date'] = date
        ad['url'] = fetchdata.getrawurl(url, 'ee')
        print "AD:", ad
        assert 'pos' in ad
        assert 'name' in ad
        ads.append(ad)

Example #13

0

Show file

def ev_parse_airfields():
    ads=[]
    spaces=[]
    seen=set()
    cur_airac=get_cur_airac()
    assert cur_airac
    for icao in ["EVRA",
                "EVLA",
                "EVTJ",
                "EVVA"]:
        thrs=[]
        url="/eAIPfiles/%s-AIRAC/html/eAIP/EV-AD-2.%s-en-GB.html"%(cur_airac,icao)
        data,date=fetchdata.getdata(url,country='ev')
        parser=lxml.html.HTMLParser()
        parser.feed(data)
        tree=parser.close()
        elev=None
        pos=None
        ctrarea=None
        ctr=None
        ctralt=None
        ctrname=None
        adcharturl=None
        adchart=None
        adnametag,=tree.xpath("//p[@class='ADName']")
        adnamestr=alltext(adnametag)
        print adnamestr
        name,=re.match(ur"%s\s*[-—]\s*([\w\s]+)"%(icao,),adnamestr,re.UNICODE).groups()
        freqs=[]
        for table in tree.xpath("//table"):
            rows=list(table.xpath(".//tr"))
            
            headings=list(table.xpath(".//th"))
            
            if len(headings)==5:
                if headings[2]=="Frequency":
                    for row in rows:
                        cols=alltexts(table.xpath(".//td"))
                        desig,name=cols[0:2]
                        freq,=re.match(ur"\d{3}\.\d{3}\s*MHz",cols[2]).groups()
                        if freq!="121.500":
                            freqs.append((desig+" "+name,float(freq)))                        
                        
                    continue
                
            
            for row in rows:
                cols=alltexts(row.xpath(".//td"))
                print "cols:",repr(cols)
                if len(cols)<2: continue
                if not pos and re.match(ur".*ARP\s*coordinates.*",cols[1]):
                    pos,=mapper.parsecoords(cols[2])
                if not elev and re.match(ur"Elevation.*",cols[1]):
                    elev,=re.match(ur"(\d+) FT.*",cols[2]).groups()
                
                if not ctr and re.match(ur"Designation\s*and\s*lateral\s*limits",cols[1]):
                    lines=cols[2].split("\n")
                    ctr=True
                    print "Got lateral limits",lines[0]
                    try:
                        ctrname,type_=re.match(ur"^([\w\s]+)(CTR|TIZ)",lines[0]).groups()
                        ctrarea=" ".join(lines[1:])
                    except:
                        ctrname,type_=re.match(ur"^([\w\s]+)(CTR|TIZ)",lines[0]+lines[1]).groups()
                        ctrarea=" ".join(lines[2:])
                    assert ctrname.strip()
                    ctrname=ctrname.strip()+" "+type_
                    
                #print ".",cols[1],"."
                if not ctralt and re.match(ur".*Vertical\s*limits.*",cols[1],re.UNICODE):
                    ctralt=True
                    #print "<",cols[2],">"
                    alts=cols[2].split("/")
                    if len(alts)==1:                    
                        ceiling=alts[0]
                        floor="GND"
                    else:
                        ceiling,floor=alts
                    print "Parsed",ceiling,floor

Example #14

0

Show file

                if not ctralt and re.match(ur".*Vertical\s*limits.*",cols[1],re.UNICODE):
                    ctralt=True
                    #print "<",cols[2],">"
                    alts=cols[2].split("/")
                    if len(alts)==1:                    
                        ceiling=alts[0]
                        floor="GND"
                    else:
                        ceiling,floor=alts
                    print "Parsed",ceiling,floor


             
                    
        for h4 in tree.xpath(".//h4"):
            txt=alltext(h4)
            if txt.count("RUNWAY PHYSICAL"):
                par=h4.getparent()

                for table in par.xpath(".//table"):
                    prevnametxt=""
                    for idx,tr in enumerate(table.xpath(".//tr")):
                        if idx==0:
                            fc=alltext(tr.getchildren()[0])
                            print "FC",fc
                            if not fc.count("Designations"):
                                break #skip table
                        if idx<2:continue
                        if len(tr.getchildren())==1:continue
                        print "c:",tr.getchildren(),alltexts(tr.getchildren())
                        desig,trubrg,dims,strength,thrcoord,threlev=tr.getchildren()

Example #15

0

Show file

def parse_info():
    out = []
    for fname in os.listdir("fplan/extract/ads"):
        print "Processing", fname
        p = os.path.join("fplan/extract/ads", fname)

        data = open(p).read()
        parser = lxml.html.HTMLParser()
        parser.feed(data)
        tree = parser.close()
        ai = None
        for table in tree.xpath(".//table"):
            #print "New table in",fname
            anyap = False
            for idx, tr in enumerate(table.xpath("tr")):
                tds = [alltext(td).strip() for td in tr.xpath("td|th")]
                if idx == 0:
                    for i, td in enumerate(tds):
                        if td.lower().count("airport") or td.lower().count(
                                "name"):
                            #print "Td",td,"contains airport",i
                            ai = i
                            break
                    #print "head tds:",tds
                    #assert(ai!=None)
                    #print "ai=",ai
                    continue
                icao = None
                #print "Reg row",repr(tds)
                for i, td in enumerate(tds):
                    m = re.match(r".*\b([A-Z]{4})\b.*", td)
                    if m:
                        #print "Match:",tds
                        possicao, = m.groups()
                        if possicao == 'ICAO' or possicao == 'IATA':
                            continue
                        possname = tds[ai]
                        if len(possname) <= 3:
                            continue
                        name = possname
                        icao = possicao
                        break
                if icao:
                    if type(name) != unicode:
                        name = unicode(name, "utf8")

                    out.append((name, icao))
                    anyap = True

            if anyap:
                ap = False
                for name, icao in out:
                    if name.lower().count("airport"):
                        ap = True
                if ap == False:
                    print "-------------------------------------"
                    for name, icao in out:
                        print name, icao
                    print "^None of the airport names contained the word airport:", fname
                    raise Exception("Not any airport name")

    out.append((u"Isle of Man Airport", u"EGNS"))
    out.append((u"Guernsey Airport", u"EGJB"))
    out.append((u"Jersey", u"EGJJ"))
    return out

Example #16

0

Show file

File: ev_parse_tma.py Project: dimme/SwFlightPlanner

def ev_parse_x(url):
    out = []
    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(url, country="ev")
    parser.feed(data)
    tree = parser.close()
    got_fir = False
    for table in tree.xpath("//table"):
        #print "Table with %d children"%(len(table.getchildren()),)
        rows = list(table.xpath(".//tr"))

        #for idx,col in enumerate(cols):
        #    print "Col %d, %s"%(idx,alltext(col)[:10])
        headingcols = rows[0].xpath(".//th")
        if len(headingcols) == 0: continue
        name, alt = headingcols[0:2]
        if alltext(name).count("QNH") and len(headingcols) > 6:
            continue
        print alltext(name)
        assert alltext(name).lower().count("name") or alltext(
            name).lower().count("lateral")
        print alltext(alt)
        assert alltext(alt).lower().count("limit")

        for row in rows[1:]:
            cols = list(row.xpath(".//td"))
            if len(cols) < 2: continue
            name, alt = cols[:2]
            lines = [x.strip() for x in alltext(name).split("\n") if x.strip()]
            if len(lines) == 0: continue
            assert len(lines)

            spacename = lines[0].strip()
            if spacename.strip(
            ) == "A circle radius 0,5 NM centered on 565705N 0240619E EVR2 RIGA":
                spacename = "EVR2 RIGA"
                lines = [spacename, lines[0][:-len(spacename)].strip()
                         ] + lines[1:]
            print spacename
            if spacename.strip() == "SKRIVERI":
                continue
            print "Spacename is:", spacename
            assert spacename[:3] in ["EVR","EVP","TSA","TRA"] or \
                spacename.endswith("ATZ") or \
                spacename.endswith("ATZ (MILITARY)")

            altcand = []
            for altc in alltext(alt).split("\n"):
                if altc.count("Real-time"): continue
                altcand.append(altc.strip())
            print "Altcands:", altcand
            ceiling, floor = [x.strip() for x in " ".join(altcand).split("/")]
            ceiling = strangefix(ceiling)
            floor = strangefix(floor)

            mapper.parse_elev(ceiling)
            ifloor = mapper.parse_elev(floor)
            iceiling = mapper.parse_elev(ceiling)
            if ifloor >= 9500 and iceiling >= 9500:
                continue
            assert ifloor < iceiling

            freqs = []
            raw = " ".join(lines[1:])
            raw = re.sub(
                s(ur"Area bounded by lines successively joining the following points:"
                  ), "", raw)
            print "Raw:", raw

            coords = mapper.parse_coord_str(raw, context='latvia')
            for cleaned in clean_up_polygon(coords):
                out.append(
                    dict(name=spacename,
                         points=cleaned,
                         type="R",
                         freqs=freqs,
                         floor=floor,
                         url=url,
                         date=date,
                         ceiling=ceiling))

    return out

Example #17

0

Show file

File: ee_parse_airfields2.py Project: avl/SwFlightPlanner

def ee_parse_airfields2():
    ads=[]
    spaces=[]
    airac_date=get_airac_date()
    print "airac",airac_date
    overview_url="/%s/html/eAIP/EE-AD-0.6-en-GB.html"%(airac_date,)
        
    parser=lxml.html.HTMLParser()
    data,date=fetchdata.getdata(overview_url,country='ee')
    parser.feed(data)
    tree=parser.close()
    icaos=[]
    for cand in tree.xpath(".//h3"):
        txts=alltexts(cand.xpath(".//a"))
        aps=re.findall(r"EE[A-Z]{2}"," ".join(txts))
        if aps:
            icao,=aps
            if alltext(cand).count("HELIPORT"):
                print "Ignore heliport",icao
                continue
            icaos.append(icao)
    
    for icao in icaos:
        ad=dict(icao=icao)
        url="/%s/html/eAIP/EE-AD-2.%s-en-GB.html"%(airac_date,icao)
        data,date=fetchdata.getdata(url,country='ee')
        parser.feed(data)
        tree=parser.close()
        thrs=[]


        
        for h3 in tree.xpath(".//h3"):
            txt=alltext(h3)
            print repr(txt)
            ptrn=ur"\s*%s\s+[—-]\s+(.*)"%(unicode(icao.upper()),)
            m=re.match(ptrn,txt,re.UNICODE)
            if m:
                assert not 'name' in ad
                ad['name']=m.groups()[0]
                
        for tr in tree.xpath(".//tr"):
            txt=alltext(tr)
            m=re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*",txt)
            #print "Matching,",txt,":",m 
            if m:
                crds,=m.groups()
                ad['pos']=mapper.anyparse(crds)
                
        space=dict()
        for table in tree.xpath(".//table"):
            for tr in table.xpath(".//tr"):
                trtxt=alltext(tr)
                if trtxt.count("Designation and lateral limits"):
                    space=dict()
                    coords=tr.getchildren()[2]
                    lines=alltext(coords).split("\n")
                    if lines[0].strip()=='NIL':
                        continue
                    
                    
                    zname,what,spill=re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)",lines[0]).groups()
                    if spill and spill.strip():
                        rest=[spill]+lines[1:]
                    else:
                        rest=lines[1:]
                    what=what.strip()
                    assert ad['name'].upper().strip().count(zname.upper().strip())
                    assert what in ['FIZ','TIZ','CTR']
                    space['type']=what
                    space['points']=mapper.parse_coord_str("\n".join(rest))

                    space['name']=zname+" "+what
                    space['date']=date
                    space['url']=fetchdata.getrawurl(url,'ee')
                 
                    
                if trtxt.count("Vertical limits"):
                    vlim=alltext(tr.getchildren()[2])
                    if vlim.strip()=='NIL': continue
                    space['floor'],space['ceiling']=vlim.split(" to ")
                    
                #space['freqs']=x
                
        #hlc=False
        for h4 in tree.xpath(".//h4"):
            txt=alltext(h4)
            if txt.lower().count("charts"):
                par=h4.getparent()
                for table in par.xpath(".//table"):
                    for idx,tr in enumerate(table.xpath(".//tr")):
                        name,page=\
                            tr.getchildren()
                        nametxt=alltext(name)
                        print "nametxt:",nametxt,"link:"
                        for reg,variant in [
                                           (r"Aerodrome.*Chart.*","") ,
                                           (r"Landing.*Chart.*","landing"), 
                                           (r".*Parking.*Chart.*","parking"), 
                                           (r".*Visual.*Approach.*|.*\bVAC\b.*","vac")
                                            ]:
                            if re.match(reg,nametxt):
                                for a in page.xpath(".//a"):
                                    print "linklabel",a.text
                                    print "attrib:",a.attrib
                                    href=a.attrib['href']
                                    print "Bef repl",href
                                    if href.lower().endswith("pdf"):
                                        href=href.replace("../../graphics","/%s/graphics"%(airac_date,))
                                        print "href:",href,airac_date
                                        assert href
                                        parse_landing_chart.help_plc(ad,href,
                                                        icao,ad['pos'],"ee",variant=variant)
                                        """arp=ad['pos']
                                        lc=parse_landing_chart.parse_landing_chart(
                                                href,
                                                icao=icao,
                                                arppos=arp,country="ee")
                                        assert lc
                                        if lc:
                                            ad['adcharturl']=lc['url']
                                            ad['adchart']=lc
                                            hlc=True
                                            #chartblobnames.append(lc['blobname'])
                                        """                                                    
        #assert hlc
        for h4 in tree.xpath(".//h4"):
            txt=alltext(h4)
            if txt.count("RUNWAY PHYSICAL"):
                par=h4.getparent()

                for table in par.xpath(".//table"):
                    prevnametxt=""
                    for idx,tr in enumerate(table.xpath(".//tr")):
                        if idx==0:
                            fc=alltext(tr.getchildren()[0])
                            print "FC",fc
                            if not fc.count("Designations"):
                                break #skip table
                        if idx<2:continue
                        if len(tr.getchildren())==1:continue
                        print "c:",tr.getchildren(),alltexts(tr.getchildren())
                        desig,trubrg,dims,strength,thrcoord,threlev=tr.getchildren()
                        rwy=re.match(r"(\d{2}[LRC]?)",alltext(desig))
                        altc=alltext(thrcoord)
                        print "Matching",altc
                        print "rwymatch:",alltext(desig)
                        m=re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*",altc,re.DOTALL|re.MULTILINE)                        
                        if m:
                            lat,lon=m.groups()
                            print "Got latlon",lat,lon
                            thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy.groups()[0]))         
                        
                                
        space['freqs']=[]
        for h4 in tree.xpath(".//h4"):
            txt=alltext(h4)
            if txt.count("ATS COMMUNICATION"):
                par=h4.getparent()
                for table in par.xpath(".//table"):
                    for idx,tr in enumerate(table.xpath(".//tr")):
                        print "cs",repr(tr.getchildren()),alltexts(tr.getchildren())
                        print len(tr.getchildren())
                        if len(tr.getchildren())!=5:
                            if "".join(alltexts(tr.getchildren())).count(u"EMERG"):
                                continue #Sometimes emergency freq is listed, and then it is without callsign
                        service,callsign,frequency,hours,remarks=\
                            tr.getchildren()
                        callsigntxt=alltext(callsign)
                        if idx<2:
                            if idx==0:
                                assert callsigntxt.strip()=="Call sign"
                            if idx==1:
                                 assert callsigntxt.strip()=="2"
                            continue
                        ftext=alltext(frequency)
                        print "matching freq",ftext
                        for freq in re.findall(ur"\b\d{3}\.\d{1,3}",ftext):
                            freqmhz=float(freq)                            
                            space['freqs'].append((callsigntxt.strip(),freqmhz))
                              
        if space and 'points' in space:
            assert 'freqs' in space
            assert 'points' in space
            assert 'floor' in space
            assert 'ceiling' in space
            assert 'type' in space
            spaces.append(space)
        if thrs:
            ad['runways']=rwy_constructor.get_rwys(thrs)
            
        aip_text_documents.help_parse_doc(ad,url,
                        icao,"ee",title="General Information",category="general")
            
        ad['date']=date
        ad['url']=fetchdata.getrawurl(url,'ee')   
        print "AD:",ad
        assert 'pos' in ad
        assert 'name' in ad
        ads.append(ad)

Example #18

0

Show file

File: ee_parse_tma2.py Project: dimme/SwFlightPlanner

def ee_parse_tma2():
    spaces = []
    airac_date = get_airac_date()
    url = "/%s/html/eAIP/EE-ENR-2.1-en-GB.html" % (airac_date, )
    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(url, country='ee')
    parser.feed(data)
    tree = parser.close()
    icaos = []

    def nested(tab):
        if tab == None: return False
        if tab.getparent() is None:
            return False
        #print dir(tab)
        if tab.tag == 'table':
            return True
        return nested(tab.getparent())

    for tab in tree.xpath(".//table"):
        print "table alltext:", alltext(tab)
        if nested(tab.getparent()): continue
        firsttr = tab.xpath(".//tr")[0]
        ntext = alltext(firsttr)
        print "firsttr", firsttr
        print "ntext", ntext
        if re.match(ur".*FIR\s*/\s*CTA.*", ntext):
            print "Matches Tallin FIR"
            name = 'TALLIN FIR'
            points = mapper.parse_coord_str(firtxt, context='estonia')
            floor, ceiling = "GND", "FL195"
            space = {}
            space['name'] = name
            space['points'] = points
            space['floor'] = floor
            space['ceiling'] = ceiling
            space['freqs'] = []
            space['icao'] = 'EETT'
            space['type'] = 'FIR'
            space['date'] = date
            space['url'] = fetchdata.getrawurl(url, 'ee')
            spaces.append(space)
            continue
        else:
            name = ntext.strip()
        space = dict(name=name)
        print "Name", name
        assert space['name'].count("TMA") \
            or space['name'].count("FIR")
        if space['name'].count("FIR"):
            type = 'FIR'
        else:
            type = "TMA"
        freqs = []
        points = None
        floor = None
        ceiling = None
        for cand in tab.xpath(".//tr"):
            if len(cand.getchildren()) != 2:
                continue
            nom, what = cand.getchildren()
            whattxt = alltext(what)
            nomtxt = alltext(nom)
            print "nomtxt", nomtxt, "space name", space['name']
            if nomtxt.count("Lateral limits"):
                if space['name'].count("TALLINN TMA 2"):
                    points = mapper.parse_coord_str("""                
                        A circle with radius 20 NM centred on 592448N 0244957E
                        """)
                else:
                    whattxt = whattxt.replace(
                        "then along the territory dividing line between Estonia and Russia to",
                        "- Along the common Estonian/X state boundary to ")
                    print "Fixed up", whattxt
                    points = mapper.parse_coord_str(whattxt, context='estonia')
            if nomtxt.count("Vertical limits"):
                floor, ceiling = whattxt.split(" to ")
            if nomtxt.count("Call sign"):
                callsign = whattxt.split("\n")[0]
            if nomtxt.count("freq"):
                freqs.extend(re.findall(ur"\d+\.\d+\s*MHz"))

        assert points and floor and ceiling
        space['points'] = points
        space['type'] = type
        space['floor'] = floor
        space['ceiling'] = ceiling
        space['freqs'] = []
        space['type'] = type
        space['date'] = date
        space['url'] = fetchdata.getrawurl(url, 'ee')
        for freq in freqs:
            space['freqs'].append((callsign, freq))
        spaces.append(space)

Example #19

0

Show file

File: ee_parse_r2.py Project: avl/SwFlightPlanner

def ee_parse_gen_r2(url):
    spaces=[]
    parser=lxml.html.HTMLParser()
    data,date=fetchdata.getdata(url,country='ee')
    parser.feed(data)
    tree=parser.close()
    print "Parsed tree"
    for tab in tree.xpath(".//table"):
        print "Found table"
        for idx,cand in enumerate(tab.xpath(".//tr")):
            if len(cand.getchildren())<3:
                continue
            space=dict()
            #print list(cand.getchildren())
            what,vert,remark=list(cand.getchildren())[0:3]         
            whattxt=alltext(what).replace(u"–","-").replace(u"\xa0"," ")
            
            verttxt=alltext(vert)
            
            while True:
                w=re.sub(ur"\(.*?\)","",whattxt)
                if w!=whattxt:
                    whattxt=w 
                    continue
                break
            
            #print idx,whattxt
            if idx<3:
                if idx==1: assert (whattxt.count("Identification") or whattxt.count("ateral limits"))
                if idx==2: assert whattxt.strip()=="1"
                continue 
            verttxt=verttxt.replace(u"\xa0",u" ")
            vertlines=[x for x in verttxt.split("\n") if x.strip()]
            if len(vertlines)==1:
                vertlines=[x for x in verttxt.split("  ") if x.strip()]
            print "Verlintes:",repr(vertlines)
            #print "wha------------------------ t",whattxt
            space['ceiling'],space['floor']=vertlines[:2]
            mapper.parse_elev(space['ceiling'])
            ifloor=mapper.parse_elev(space['floor'])
            if ifloor>=9500: continue
            lines=whattxt.split("\n")
            out=[]
            merged=""
            for line in lines[1:]:
                line=line.strip().replace(u"–","-")
                if line=="":continue
                if line.endswith("point"):
                    out.append(line+" ")
                    continue
                if line.endswith("ircle with radius of") or line.endswith(",") or line.endswith("on") or line.endswith("radius"):
                    merged=" ".join([merged,line])
                    print "<---Merged:",merged
                    continue
                if merged:
                    line=" ".join([merged,line])
                merged=""
                if not line.endswith("-"):
                    line=line+" -"
                out.append(line+"\n")
            
            space['name']=lines[0].strip()
            w="".join(out)
            print "Parsing:",w
            if space['name'].startswith('EER1 '):                
                w=ee_parse_tma2.eer1txt
                fir=mapper.parse_coord_str(ee_parse_tma2.firtxt,context='estonia')
                fir_context=[fir]
                space['points']=mapper.parse_coord_str(w,fir_context=fir_context)
            else:
                space['points']=mapper.parse_coord_str(w,context='estonia')
            space['type']='R'
            space['date']=date
            space['freqs']=[]
            space['url']=fetchdata.getrawurl(url,'ee')            
            spaces.append(space)
    return spaces

Example #20

0

Show file

File: ee_parse_tma2.py Project: avl/SwFlightPlanner

def ee_parse_tma2():
    spaces=[]
    airac_date=get_airac_date()    
    url="/%s/html/eAIP/EE-ENR-2.1-en-GB.html"%(airac_date,)
    parser=lxml.html.HTMLParser()
    data,date=fetchdata.getdata(url,country='ee')
    parser.feed(data)
    tree=parser.close()
    icaos=[]
    def nested(tab):
        if tab==None: return False
        if tab.getparent() is None:
            return False
        #print dir(tab)
        if tab.tag=='table':
            return True
        return nested(tab.getparent())
    for tab in tree.xpath(".//table"):
        print "table alltext:",alltext(tab)
        if nested(tab.getparent()): continue
        firsttr=tab.xpath(".//tr")[0]
        ntext=alltext(firsttr)
        print "firsttr",firsttr
        print "ntext",ntext
        if re.match(ur".*FIR\s*/\s*CTA.*",ntext):
            print "Matches Tallin FIR"
            name='TALLIN FIR'
            points=mapper.parse_coord_str(firtxt,context='estonia')
            floor,ceiling="GND","FL195"
            space={}
            space['name']=name
            space['points']=points
            space['floor']=floor
            space['ceiling']=ceiling
            space['freqs']=[]
            space['icao']='EETT'
            space['type']='FIR'
            space['date']=date
            space['url']=fetchdata.getrawurl(url,'ee')
            spaces.append(space)            
            continue
        else:
            name=ntext.strip()
        space=dict(name=name)
        print "Name",name
        assert space['name'].count("TMA") \
            or space['name'].count("FIR")
        if space['name'].count("FIR"):
            type='FIR'            
        else:
            type="TMA"
        freqs=[]
        points=None
        floor=None
        ceiling=None
        for cand in tab.xpath(".//tr"):
            if len(cand.getchildren())!=2:
                continue
            nom,what=cand.getchildren()            
            whattxt=alltext(what)
            nomtxt=alltext(nom)
            print "nomtxt",nomtxt,"space name",space['name']
            if nomtxt.count("Lateral limits"):
                if space['name'].count("TALLINN TMA 2"):
                    points=mapper.parse_coord_str("""                
                        A circle with radius 20 NM centred on 592448N 0244957E
                        """)
                else:               
                    whattxt=whattxt.replace(
                        "then along the territory dividing line between Estonia and Russia to",
                        "- Along the common Estonian/X state boundary to " 
                        )
                    print "Fixed up",whattxt
                    points=mapper.parse_coord_str(whattxt,context='estonia')
            if nomtxt.count("Vertical limits"):
                floor,ceiling=whattxt.split(" to ")
            if nomtxt.count("Call sign"):
                callsign=whattxt.split("\n")[0]
            if nomtxt.count("freq"):
                freqs.extend(re.findall(ur"\d+\.\d+\s*MHz"))
                
        assert points and floor and ceiling
        space['points']=points
        space['type']=type
        space['floor']=floor
        space['ceiling']=ceiling
        space['freqs']=[]
        space['type']=type
        space['date']=date
        space['url']=fetchdata.getrawurl(url,'ee')
        for freq in freqs:
            space['freqs'].append((callsign,freq))
        spaces.append(space)

Example #21

0

Show file

File: ev_parse_tma.py Project: avl/SwFlightPlanner

def ev_parse_x(url):
    out = []
    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(url, country="ev")
    parser.feed(data)
    tree = parser.close()
    got_fir = False
    for table in tree.xpath("//table"):
        # print "Table with %d children"%(len(table.getchildren()),)
        rows = list(table.xpath(".//tr"))

        # for idx,col in enumerate(cols):
        #    print "Col %d, %s"%(idx,alltext(col)[:10])
        headingcols = rows[0].xpath(".//th")
        if len(headingcols) == 0:
            continue
        name, alt = headingcols[0:2]
        if alltext(name).count("QNH") and len(headingcols) > 6:
            continue
        print alltext(name)
        assert alltext(name).lower().count("name") or alltext(name).lower().count("lateral")
        print alltext(alt)
        assert alltext(alt).lower().count("limit")

        for row in rows[1:]:
            cols = list(row.xpath(".//td"))
            if len(cols) < 2:
                continue
            name, alt = cols[:2]
            lines = [x.strip() for x in alltext(name).split("\n") if x.strip()]
            if len(lines) == 0:
                continue
            assert len(lines)

            spacename = lines[0].strip()
            if spacename.strip() == "A circle radius 0,5 NM centered on 565705N 0240619E EVR2 RIGA":
                spacename = "EVR2 RIGA"
                lines = [spacename, lines[0][: -len(spacename)].strip()] + lines[1:]
            print spacename
            if spacename.strip() == "SKRIVERI":
                continue
            print "Spacename is:", spacename
            assert (
                spacename[:3] in ["EVR", "EVP", "TSA", "TRA"]
                or spacename.endswith("ATZ")
                or spacename.endswith("ATZ (MILITARY)")
            )

            altcand = []
            for altc in alltext(alt).split("\n"):
                if altc.count("Real-time"):
                    continue
                altcand.append(altc.strip())
            print "Altcands:", altcand
            ceiling, floor = [x.strip() for x in " ".join(altcand).split("/")]
            ceiling = strangefix(ceiling)
            floor = strangefix(floor)

            mapper.parse_elev(ceiling)
            ifloor = mapper.parse_elev(floor)
            iceiling = mapper.parse_elev(ceiling)
            if ifloor >= 9500 and iceiling >= 9500:
                continue
            assert ifloor < iceiling

            freqs = []
            raw = " ".join(lines[1:])
            raw = re.sub(s(ur"Area bounded by lines successively joining the following points:"), "", raw)
            print "Raw:", raw

            coords = mapper.parse_coord_str(raw, context="latvia")
            for cleaned in clean_up_polygon(coords):
                out.append(
                    dict(
                        name=spacename,
                        points=cleaned,
                        type="R",
                        freqs=freqs,
                        floor=floor,
                        url=url,
                        date=date,
                        ceiling=ceiling,
                    )
                )

    return out

Example #22

0

Show file

File: ev_parse_tma.py Project: avl/SwFlightPlanner

def ev_parse_tma():
    out = []
    parser = lxml.html.HTMLParser()
    # url="/Latvia_EV-ENR-2.1-en-GB.html"
    cur_airac = get_cur_airac()
    url = "/eAIPfiles/%s-AIRAC/html/eAIP/EV-ENR-2.1-en-GB.html" % (cur_airac,)

    data, date = fetchdata.getdata(url, country="ev")
    parser.feed(data)
    tree = parser.close()

    got_fir = False
    for table in tree.xpath("//table"):
        # print "Table with %d children"%(len(table.getchildren()),)
        rows = list(table.xpath(".//tr"))
        for idx in xrange(5):
            headingrow = rows[idx]
            cols = list(headingrow.xpath(".//th"))
            # print len(cols)
            if len(cols) == 5:
                break
        else:
            raise Exception("No heading row")
        assert idx == 0
        # for idx,col in enumerate(cols):
        #    print "Col %d, %s"%(idx,alltext(col)[:10])
        nameh, unith, callsignh, freqh, remarkh = cols
        assert alltext(nameh).lower().count("name")
        assert alltext(unith).lower().count("unit")
        assert re.match(ur"call\s*sign", alltext(callsignh).lower())
        lastcols = None
        for row in rows[1:]:
            cols = list(row.xpath(".//td"))
            if len(cols) == 5:
                name, unit, callsign, freq, remark = cols
                lastcols = cols
            else:
                if lastcols:
                    unit, callsign, freq, remark = lastcols[1:]
                    name = cols[0]
                else:
                    continue

            lines = [x.strip() for x in alltext(name).split("\n") if x.strip()]
            if len(lines) == 0:
                continue
            spacename = lines[0].strip()

            if re.match(ur"RIGA\s*UTA|RIGA\s*CTA|RIGA\s*AOR.*", spacename):
                continue
            freqstr = alltext(freq)
            callsignstr = alltext(callsign)
            if freqstr.strip():
                print freqstr
                freqmhzs = re.findall(ur"\d{3}\.\d{3}", freqstr)
                assert len(freqmhzs) <= 2
                callsigns = [callsignstr.split("\n")[0].strip()]
                freqs = []
                for idx, freqmhz in enumerate(freqmhzs):
                    if freqmhz == "121.500":
                        continue
                    freqs.append((callsigns[idx], float(freqmhz)))
                print "freqs:", freqs
            else:
                freqs = []
            assert len(lines)

            classidx = next(idx for idx, x in reversed(list(enumerate(lines))) if x.lower().count("class of airspace"))

            if re.match(ur"RIGA\s*FIR.*UIR", spacename, re.UNICODE):
                got_fir = True
                lastspaceidx = classidx - 2
                floor = "GND"
                ceiling = "-"
                type_ = "FIR"
            else:
                if lines[classidx - 1].count("/") == 1:
                    floor, ceiling = lines[classidx - 1].split("/")
                    lastspaceidx = classidx - 1
                else:
                    floor = lines[classidx - 1]
                    ceiling = lines[classidx - 2]
                    lastspaceidx = classidx - 2
                ceiling = strangefix(ceiling)
                floor = strangefix(floor)

                mapper.parse_elev(ceiling)
                mapper.parse_elev(floor)
                type_ = "TMA"
            tcoords = lines[1:lastspaceidx]
            # verify that we got actual altitudes:
            coords = []
            for coord in tcoords:
                coord = coord.strip().replace("(counter-)", "").replace("(RIGA DVOR - RIA)", "")
                if coord.endswith(u"E") or coord.endswith("W"):
                    coord = coord + " -"
                coords.append(coord)

            raw = " ".join(coords)
            raw = re.sub(s(ur"Area bounded by lines successively joining the following points:"), "", raw)
            print "Raw:", raw
            coords = mapper.parse_coord_str(raw, context="latvia")
            for cleaned in clean_up_polygon(coords):
                out.append(
                    dict(
                        name=spacename,
                        points=cleaned,
                        type=type_,
                        freqs=freqs,
                        floor=floor,
                        url=url,
                        date=date,
                        ceiling=ceiling,
                    )
                )
                if type_ == "FIR":
                    out[-1]["icao"] = "EVRR"

Example #23

0

Show file

File: ev_parse_tma.py Project: dimme/SwFlightPlanner

def ev_parse_tma():
    out = []
    parser = lxml.html.HTMLParser()
    #url="/Latvia_EV-ENR-2.1-en-GB.html"
    cur_airac = get_cur_airac()
    url = "/eAIPfiles/%s-AIRAC/html/eAIP/EV-ENR-2.1-en-GB.html" % (cur_airac, )

    data, date = fetchdata.getdata(url, country='ev')
    parser.feed(data)
    tree = parser.close()

    got_fir = False
    for table in tree.xpath("//table"):
        #print "Table with %d children"%(len(table.getchildren()),)
        rows = list(table.xpath(".//tr"))
        for idx in xrange(5):
            headingrow = rows[idx]
            cols = list(headingrow.xpath(".//th"))
            #print len(cols)
            if len(cols) == 5:
                break
        else:
            raise Exception("No heading row")
        assert idx == 0
        #for idx,col in enumerate(cols):
        #    print "Col %d, %s"%(idx,alltext(col)[:10])
        nameh, unith, callsignh, freqh, remarkh = cols
        assert alltext(nameh).lower().count("name")
        assert alltext(unith).lower().count("unit")
        assert re.match(ur"call\s*sign", alltext(callsignh).lower())
        lastcols = None
        for row in rows[1:]:
            cols = list(row.xpath(".//td"))
            if len(cols) == 5:
                name, unit, callsign, freq, remark = cols
                lastcols = cols
            else:
                if lastcols:
                    unit, callsign, freq, remark = lastcols[1:]
                    name = cols[0]
                else:
                    continue

            lines = [x.strip() for x in alltext(name).split("\n") if x.strip()]
            if len(lines) == 0: continue
            spacename = lines[0].strip()

            if re.match(ur"RIGA\s*UTA|RIGA\s*CTA|RIGA\s*AOR.*", spacename):
                continue
            freqstr = alltext(freq)
            callsignstr = alltext(callsign)
            if freqstr.strip():
                print freqstr
                freqmhzs = re.findall(ur"\d{3}\.\d{3}", freqstr)
                assert len(freqmhzs) <= 2
                callsigns = [callsignstr.split("\n")[0].strip()]
                freqs = []
                for idx, freqmhz in enumerate(freqmhzs):
                    if freqmhz == '121.500': continue
                    freqs.append((callsigns[idx], float(freqmhz)))
                print "freqs:", freqs
            else:
                freqs = []
            assert len(lines)

            classidx = next(idx for idx, x in reversed(list(enumerate(lines)))
                            if x.lower().count("class of airspace"))

            if re.match(ur"RIGA\s*FIR.*UIR", spacename, re.UNICODE):
                got_fir = True
                lastspaceidx = classidx - 2
                floor = "GND"
                ceiling = "-"
                type_ = "FIR"
            else:
                if lines[classidx - 1].count("/") == 1:
                    floor, ceiling = lines[classidx - 1].split("/")
                    lastspaceidx = classidx - 1
                else:
                    floor = lines[classidx - 1]
                    ceiling = lines[classidx - 2]
                    lastspaceidx = classidx - 2
                ceiling = strangefix(ceiling)
                floor = strangefix(floor)

                mapper.parse_elev(ceiling)
                mapper.parse_elev(floor)
                type_ = "TMA"
            tcoords = lines[1:lastspaceidx]
            #verify that we got actual altitudes:
            coords = []
            for coord in tcoords:
                coord = coord.strip().replace("(counter-)", "").replace(
                    "(RIGA DVOR - RIA)", "")
                if coord.endswith(u"E") or coord.endswith("W"):
                    coord = coord + " -"
                coords.append(coord)

            raw = " ".join(coords)
            raw = re.sub(
                s(ur"Area bounded by lines successively joining the following points:"
                  ), "", raw)
            print "Raw:", raw
            coords = mapper.parse_coord_str(raw, context='latvia')
            for cleaned in clean_up_polygon(coords):
                out.append(
                    dict(name=spacename,
                         points=cleaned,
                         type=type_,
                         freqs=freqs,
                         floor=floor,
                         url=url,
                         date=date,
                         ceiling=ceiling))
                if type_ == 'FIR':
                    out[-1]['icao'] = "EVRR"

Example #24

0

Show file

def ee_parse_gen_r2(url):
    spaces = []
    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(url, country='ee')
    parser.feed(data)
    tree = parser.close()
    print "Parsed tree"
    for tab in tree.xpath(".//table"):
        print "Found table"
        for idx, cand in enumerate(tab.xpath(".//tr")):
            if len(cand.getchildren()) < 3:
                continue
            space = dict()
            #print list(cand.getchildren())
            what, vert, remark = list(cand.getchildren())[0:3]
            whattxt = alltext(what).replace(u"–", "-").replace(u"\xa0", " ")

            verttxt = alltext(vert)

            while True:
                w = re.sub(ur"\(.*?\)", "", whattxt)
                if w != whattxt:
                    whattxt = w
                    continue
                break

            #print idx,whattxt
            if idx < 3:
                if idx == 1:
                    assert (whattxt.count("Identification")
                            or whattxt.count("ateral limits"))
                if idx == 2: assert whattxt.strip() == "1"
                continue
            verttxt = verttxt.replace(u"\xa0", u" ")
            vertlines = [x for x in verttxt.split("\n") if x.strip()]
            if len(vertlines) == 1:
                vertlines = [x for x in verttxt.split("  ") if x.strip()]
            print "Verlintes:", repr(vertlines)
            #print "wha------------------------ t",whattxt
            space['ceiling'], space['floor'] = vertlines[:2]
            mapper.parse_elev(space['ceiling'])
            ifloor = mapper.parse_elev(space['floor'])
            if ifloor >= 9500: continue
            lines = whattxt.split("\n")
            out = []
            merged = ""
            for line in lines[1:]:
                line = line.strip().replace(u"–", "-")
                if line == "": continue
                if line.endswith("point"):
                    out.append(line + " ")
                    continue
                if line.endswith("ircle with radius of") or line.endswith(
                        ",") or line.endswith("on") or line.endswith("radius"):
                    merged = " ".join([merged, line])
                    print "<---Merged:", merged
                    continue
                if merged:
                    line = " ".join([merged, line])
                merged = ""
                if not line.endswith("-"):
                    line = line + " -"
                out.append(line + "\n")

            space['name'] = lines[0].strip()
            w = "".join(out)
            print "Parsing:", w
            if space['name'].startswith('EER1 '):
                w = ee_parse_tma2.eer1txt
                fir = mapper.parse_coord_str(ee_parse_tma2.firtxt,
                                             context='estonia')
                fir_context = [fir]
                space['points'] = mapper.parse_coord_str(
                    w, fir_context=fir_context)
            else:
                space['points'] = mapper.parse_coord_str(w, context='estonia')
            space['type'] = 'R'
            space['date'] = date
            space['freqs'] = []
            space['url'] = fetchdata.getrawurl(url, 'ee')
            spaces.append(space)
    return spaces