def ev_parse_tma(): out = [] parser = lxml.html.HTMLParser() #url="/Latvia_EV-ENR-2.1-en-GB.html" cur_airac = get_cur_airac() url = "/eAIPfiles/%s-AIRAC/html/eAIP/EV-ENR-2.1-en-GB.html" % (cur_airac, ) data, date = fetchdata.getdata(url, country='ev') parser.feed(data) tree = parser.close() got_fir = False for table in tree.xpath("//table"): #print "Table with %d children"%(len(table.getchildren()),) rows = list(table.xpath(".//tr")) for idx in xrange(5): headingrow = rows[idx] cols = list(headingrow.xpath(".//th")) #print len(cols) if len(cols) == 5: break else: raise Exception("No heading row") assert idx == 0 #for idx,col in enumerate(cols): # print "Col %d, %s"%(idx,alltext(col)[:10]) nameh, unith, callsignh, freqh, remarkh = cols assert alltext(nameh).lower().count("name") assert alltext(unith).lower().count("unit") assert re.match(ur"call\s*sign", alltext(callsignh).lower()) lastcols = None for row in rows[1:]: cols = list(row.xpath(".//td")) if len(cols) == 5: name, unit, callsign, freq, remark = cols lastcols = cols else: if lastcols: unit, callsign, freq, remark = lastcols[1:] name = cols[0] else: continue lines = [x.strip() for x in alltext(name).split("\n") if x.strip()] if len(lines) == 0: continue spacename = lines[0].strip() if re.match(ur"RIGA\s*UTA|RIGA\s*CTA|RIGA\s*AOR.*", spacename): continue freqstr = alltext(freq) callsignstr = alltext(callsign) if freqstr.strip(): print freqstr freqmhzs = re.findall(ur"\d{3}\.\d{3}", freqstr) assert len(freqmhzs) <= 2 callsigns = [callsignstr.split("\n")[0].strip()] freqs = [] for idx, freqmhz in enumerate(freqmhzs): if freqmhz == '121.500': continue freqs.append((callsigns[idx], float(freqmhz))) print "freqs:", freqs else: freqs = [] assert len(lines) classidx = next(idx for idx, x in reversed(list(enumerate(lines))) if x.lower().count("class of airspace")) if re.match(ur"RIGA\s*FIR.*UIR", spacename, re.UNICODE): got_fir = True lastspaceidx = classidx - 2 floor = "GND" ceiling = "-" type_ = "FIR" else: if lines[classidx - 1].count("/") == 1: floor, ceiling = lines[classidx - 1].split("/") lastspaceidx = classidx - 1 else: floor = lines[classidx - 1] ceiling = lines[classidx - 2] lastspaceidx = classidx - 2 ceiling = strangefix(ceiling) floor = strangefix(floor) mapper.parse_elev(ceiling) mapper.parse_elev(floor) type_ = "TMA" tcoords = lines[1:lastspaceidx] #verify that we got actual altitudes: coords = [] for coord in tcoords: coord = coord.strip().replace("(counter-)", "").replace( "(RIGA DVOR - RIA)", "") if coord.endswith(u"E") or coord.endswith("W"): coord = coord + " -" coords.append(coord) raw = " ".join(coords) raw = re.sub( s(ur"Area bounded by lines successively joining the following points:" ), "", raw) print "Raw:", raw coords = mapper.parse_coord_str(raw, context='latvia') for cleaned in clean_up_polygon(coords): out.append( dict(name=spacename, points=cleaned, type=type_, freqs=freqs, floor=floor, url=url, date=date, ceiling=ceiling)) if type_ == 'FIR': out[-1]['icao'] = "EVRR"
def ev_parse_x(url): out = [] parser = lxml.html.HTMLParser() data, date = fetchdata.getdata(url, country="ev") parser.feed(data) tree = parser.close() got_fir = False for table in tree.xpath("//table"): #print "Table with %d children"%(len(table.getchildren()),) rows = list(table.xpath(".//tr")) #for idx,col in enumerate(cols): # print "Col %d, %s"%(idx,alltext(col)[:10]) headingcols = rows[0].xpath(".//th") if len(headingcols) == 0: continue name, alt = headingcols[0:2] if alltext(name).count("QNH") and len(headingcols) > 6: continue print alltext(name) assert alltext(name).lower().count("name") or alltext( name).lower().count("lateral") print alltext(alt) assert alltext(alt).lower().count("limit") for row in rows[1:]: cols = list(row.xpath(".//td")) if len(cols) < 2: continue name, alt = cols[:2] lines = [x.strip() for x in alltext(name).split("\n") if x.strip()] if len(lines) == 0: continue assert len(lines) spacename = lines[0].strip() if spacename.strip( ) == "A circle radius 0,5 NM centered on 565705N 0240619E EVR2 RIGA": spacename = "EVR2 RIGA" lines = [spacename, lines[0][:-len(spacename)].strip() ] + lines[1:] print spacename if spacename.strip() == "SKRIVERI": continue print "Spacename is:", spacename assert spacename[:3] in ["EVR","EVP","TSA","TRA"] or \ spacename.endswith("ATZ") or \ spacename.endswith("ATZ (MILITARY)") altcand = [] for altc in alltext(alt).split("\n"): if altc.count("Real-time"): continue altcand.append(altc.strip()) print "Altcands:", altcand ceiling, floor = [x.strip() for x in " ".join(altcand).split("/")] ceiling = strangefix(ceiling) floor = strangefix(floor) mapper.parse_elev(ceiling) ifloor = mapper.parse_elev(floor) iceiling = mapper.parse_elev(ceiling) if ifloor >= 9500 and iceiling >= 9500: continue assert ifloor < iceiling freqs = [] raw = " ".join(lines[1:]) raw = re.sub( s(ur"Area bounded by lines successively joining the following points:" ), "", raw) print "Raw:", raw coords = mapper.parse_coord_str(raw, context='latvia') for cleaned in clean_up_polygon(coords): out.append( dict(name=spacename, points=cleaned, type="R", freqs=freqs, floor=floor, url=url, date=date, ceiling=ceiling)) return out
def ey_parse_tma(): out=[] def emit(name,coordstr,limits,type="TMA",freqs=[],date=datetime(2011,03,25),icao=None): ceiling,floor=limits.split("/") def compact(m): return "".join(m.groups()) coordstr=re.sub(ur"(\d{2,3})\s*(\d{2})\s*(\d{2})",compact,coordstr) coordstr=re.sub(ur"NM from KNA to ","NM from 545740N 0240519E to",coordstr) print coordstr tpoints=mapper.parse_coord_str(coordstr,context='lithuania') f1=mapper.parse_elev(floor) c1=mapper.parse_elev(ceiling) if c1!='-': assert c1>f1 for points in clean_up_polygon(tpoints): out.append( dict( name=name, floor=floor, ceiling=ceiling, freqs=freqs, points=points, type=type ) ) if icao: out[-1]['icao']=icao emit(name=u"Vilnius FIR", limits="-/GND", freqs=[],
def ev_parse_x(url): out = [] parser = lxml.html.HTMLParser() data, date = fetchdata.getdata(url, country="ev") parser.feed(data) tree = parser.close() got_fir = False for table in tree.xpath("//table"): # print "Table with %d children"%(len(table.getchildren()),) rows = list(table.xpath(".//tr")) # for idx,col in enumerate(cols): # print "Col %d, %s"%(idx,alltext(col)[:10]) headingcols = rows[0].xpath(".//th") if len(headingcols) == 0: continue name, alt = headingcols[0:2] if alltext(name).count("QNH") and len(headingcols) > 6: continue print alltext(name) assert alltext(name).lower().count("name") or alltext(name).lower().count("lateral") print alltext(alt) assert alltext(alt).lower().count("limit") for row in rows[1:]: cols = list(row.xpath(".//td")) if len(cols) < 2: continue name, alt = cols[:2] lines = [x.strip() for x in alltext(name).split("\n") if x.strip()] if len(lines) == 0: continue assert len(lines) spacename = lines[0].strip() if spacename.strip() == "A circle radius 0,5 NM centered on 565705N 0240619E EVR2 RIGA": spacename = "EVR2 RIGA" lines = [spacename, lines[0][: -len(spacename)].strip()] + lines[1:] print spacename if spacename.strip() == "SKRIVERI": continue print "Spacename is:", spacename assert ( spacename[:3] in ["EVR", "EVP", "TSA", "TRA"] or spacename.endswith("ATZ") or spacename.endswith("ATZ (MILITARY)") ) altcand = [] for altc in alltext(alt).split("\n"): if altc.count("Real-time"): continue altcand.append(altc.strip()) print "Altcands:", altcand ceiling, floor = [x.strip() for x in " ".join(altcand).split("/")] ceiling = strangefix(ceiling) floor = strangefix(floor) mapper.parse_elev(ceiling) ifloor = mapper.parse_elev(floor) iceiling = mapper.parse_elev(ceiling) if ifloor >= 9500 and iceiling >= 9500: continue assert ifloor < iceiling freqs = [] raw = " ".join(lines[1:]) raw = re.sub(s(ur"Area bounded by lines successively joining the following points:"), "", raw) print "Raw:", raw coords = mapper.parse_coord_str(raw, context="latvia") for cleaned in clean_up_polygon(coords): out.append( dict( name=spacename, points=cleaned, type="R", freqs=freqs, floor=floor, url=url, date=date, ceiling=ceiling, ) ) return out
def ev_parse_tma(): out = [] parser = lxml.html.HTMLParser() # url="/Latvia_EV-ENR-2.1-en-GB.html" cur_airac = get_cur_airac() url = "/eAIPfiles/%s-AIRAC/html/eAIP/EV-ENR-2.1-en-GB.html" % (cur_airac,) data, date = fetchdata.getdata(url, country="ev") parser.feed(data) tree = parser.close() got_fir = False for table in tree.xpath("//table"): # print "Table with %d children"%(len(table.getchildren()),) rows = list(table.xpath(".//tr")) for idx in xrange(5): headingrow = rows[idx] cols = list(headingrow.xpath(".//th")) # print len(cols) if len(cols) == 5: break else: raise Exception("No heading row") assert idx == 0 # for idx,col in enumerate(cols): # print "Col %d, %s"%(idx,alltext(col)[:10]) nameh, unith, callsignh, freqh, remarkh = cols assert alltext(nameh).lower().count("name") assert alltext(unith).lower().count("unit") assert re.match(ur"call\s*sign", alltext(callsignh).lower()) lastcols = None for row in rows[1:]: cols = list(row.xpath(".//td")) if len(cols) == 5: name, unit, callsign, freq, remark = cols lastcols = cols else: if lastcols: unit, callsign, freq, remark = lastcols[1:] name = cols[0] else: continue lines = [x.strip() for x in alltext(name).split("\n") if x.strip()] if len(lines) == 0: continue spacename = lines[0].strip() if re.match(ur"RIGA\s*UTA|RIGA\s*CTA|RIGA\s*AOR.*", spacename): continue freqstr = alltext(freq) callsignstr = alltext(callsign) if freqstr.strip(): print freqstr freqmhzs = re.findall(ur"\d{3}\.\d{3}", freqstr) assert len(freqmhzs) <= 2 callsigns = [callsignstr.split("\n")[0].strip()] freqs = [] for idx, freqmhz in enumerate(freqmhzs): if freqmhz == "121.500": continue freqs.append((callsigns[idx], float(freqmhz))) print "freqs:", freqs else: freqs = [] assert len(lines) classidx = next(idx for idx, x in reversed(list(enumerate(lines))) if x.lower().count("class of airspace")) if re.match(ur"RIGA\s*FIR.*UIR", spacename, re.UNICODE): got_fir = True lastspaceidx = classidx - 2 floor = "GND" ceiling = "-" type_ = "FIR" else: if lines[classidx - 1].count("/") == 1: floor, ceiling = lines[classidx - 1].split("/") lastspaceidx = classidx - 1 else: floor = lines[classidx - 1] ceiling = lines[classidx - 2] lastspaceidx = classidx - 2 ceiling = strangefix(ceiling) floor = strangefix(floor) mapper.parse_elev(ceiling) mapper.parse_elev(floor) type_ = "TMA" tcoords = lines[1:lastspaceidx] # verify that we got actual altitudes: coords = [] for coord in tcoords: coord = coord.strip().replace("(counter-)", "").replace("(RIGA DVOR - RIA)", "") if coord.endswith(u"E") or coord.endswith("W"): coord = coord + " -" coords.append(coord) raw = " ".join(coords) raw = re.sub(s(ur"Area bounded by lines successively joining the following points:"), "", raw) print "Raw:", raw coords = mapper.parse_coord_str(raw, context="latvia") for cleaned in clean_up_polygon(coords): out.append( dict( name=spacename, points=cleaned, type=type_, freqs=freqs, floor=floor, url=url, date=date, ceiling=ceiling, ) ) if type_ == "FIR": out[-1]["icao"] = "EVRR"
pa['type'] = kind for thirdcol in thirdcols: if thirdcol in d: atc = d[thirdcol] break else: raise Exception("missing thirdcol") #print "ATc: <%s>"%(repr(atc),) freqs = [(y, float(x)) for x, y in re.findall( r"(\d{3}\.\d{3})\s*MHz\n(.*)", "\n".join(atc))] if not freqs: freqs = last_sector.get('freqs', []) #print repr(freqs) pa['freqs'] = freqs #uprint("Cleaning up ",pa['name']) for cleaned in clean_up_polygon(list(pa['points'])): d = dict(pa) #print "cleaned",cleaned for i, tup in enumerate(cleaned): assert type(tup) == str latlon = mapper.from_str(tup) lat, lon = latlon assert lat >= -85 and lat <= 85 d['points'] = cleaned #uprint("cleaned:",pa['name'],len(cleaned),cleaned) #print "name:",d['name'] #print "cleaned points:",d['points'] #print "from:",areacoords #raise Exception() out.append(d) #if pa['name'].lower().count("esrange"):
def parse_page(parser,pagenr,kind="TMA",last_sector=dict()): if kind=="TMA": thirdcols=["ATC unit","AFIS unit"] elif kind=="sector": thirdcols=["FREQ"] elif kind=="R": thirdcols=["Remarks (nature of hazard,"] else: raise Exception("Bad kind") page=parser.parse_page_to_items(pagenr) items=page.items #print "Items:",pitems #print "Possible Areas:" headings=[] for item in items: if item.text==None: continue item.text=item.text.strip() if item.text=="": continue if item.text=="Name": continue if item.y1<25 and item.text in (["Lateral limits","Vertical limits"]+thirdcols): headings.append(item) headings.sort(key=lambda x:x.x1) #print "found candidates:",zone_candidates if len(headings)==0: return [] avg_heading_y=sum(h.y1 for h in headings)/float(len(headings)) uprint("Found headings:",headings) zone_candidates=[] for item in items: if item.text==None or item.text.strip()=="": continue if item.text.strip().startswith("AMDT"): continue if item.text.strip().startswith("The LFV Group"): continue if re.match(ur"\s*LFV\s*AIRAC\s*AMDT\s*\d+/\d+\s*",item.text): continue if item.text.strip()=="LFV": continue if item.text.count('Terminal Information Areas'): continue if item.text.strip().startswith("AIRAC"): continue if kind=="R" and not is_r_or_danger_area_name(item.text.strip()): continue if item.y1>avg_heading_y+1 and item.x1<12 and not item.text in ["Name",'None',"LFV"]: if item.text.count("Established") or item.text.count(u'TROLLHÄTTAN TWR') or item.text.count(u'and/or SÅTENÄS') or item.text.count(u'TWR/TMC') or item.text.strip().endswith("TWR") or item.text.strip().endswith("TWR."): continue if item.text.count("operational hours") or item.text.count("See AIP DENMARK"): continue if item.text.count("hours of"): continue if item.text.count("Upper limit"): continue if item.text.count("that part") or item.text.count("coincides"): continue if item.text.count(u'Danger area EK D395 and') or item.text.count(u'D396 are situated within') or item.text.strip()=="TMA": continue if item.text.count(u'ÖSTGÖTA TMC is closed') or item.text.count(u'and SKAVSTA TWR is') or item.text.strip()=='open.': continue if item.text.count("SAT 0530"): continue if item.text.strip()=='OPS': continue if item.text.strip()==u'ÖSTGÖTA TMC:': continue if item.text.count(u'is open') or item.text.count('is closed'): continue if item.text.count('MON-FRI') or item.text.count('2150'): continue lines2=page.get_lines(page.get_partially_in_rect(12,item.y1+0.2,40,item.y2-0.2)) if len(lines2): zone_candidates.append(item) uprint("Found cands:",zone_candidates) zone_candidates.sort(key=lambda x:x.y1) for zone in zone_candidates: #uprint("Zone:",zone) #assert not zone.text.count("AOR") assert not zone.text.count("FIR") uprint("Headings:",headings) print "Pagenr:",pagenr assert len(headings)==3 ret=[] for i in xrange(len(zone_candidates)): d=dict() cand=zone_candidates[i] if i<len(zone_candidates)-1: nextcand=zone_candidates[i+1] else: nextcand=None y1=cand.y1-0.25 y2=100 if nextcand: y2=nextcand.y1-0.75 for j in xrange(len(headings)): head=headings[j] if j<len(headings)-1: nexthead=headings[j+1] else: nexthead=None x1=head.x1 x2=head.x2 if j==len(headings)-1: x1=headings[j-1].x2+3 x2=100 lines=page.get_lines(page.get_partially_in_rect(x1,y1,x2,y2,xsort=True,ysort=True)) #print ("Parsed %s y,%d-%d, %s: <%s>\n\n"%(cand.text,y1,y2,head.text,lines)).encode('utf8') d[head.text]=lines if kind=="R": if y2==100: y2=y1+3 d['name']=" ".join(x.strip() for x in filter_head_foot(page.get_lines(page.get_partially_in_rect(0,y1,10,y2,xsort=True,ysort=True)))) else: d['name']=cand.text.strip() ret.append(d) allow_head=2 print "Doing fixups--------------------------------------------------" tret=[] for x in ret: #print "Fixing up",x,"allow:",allow_head area="".join(x['Lateral limits']).strip() if allow_head==2 and area!="" and x['name'].strip()!="": allow_head=1 if allow_head!=1: if len(tret): tret[-1]['Lateral limits']+=x['Lateral limits'] tret[-1]['Vertical limits']+=x['Vertical limits'] else: tret.append(x) if allow_head==1: allow_head=0 if not area.endswith('-') and area!="": allow_head=2 #print " Fixed up up",x ret=tret for line in ret: print "Fixed:",line['name']," = ",line['Lateral limits'],line['Vertical limits'] out=[] for d in ret: pa=dict() curname=d['name'] if curname.count(u'Förteckning över'): continue print "D:",d arealines=[l for l in d['Lateral limits'] if l.strip()!=""] last_coord_idx=None #uprint("D:<%s> (area:%s)"%(d,arealines)) if 'FREQ' in d: freqs=[("SWEDEN CONTROL",float(x)) for x in re.findall(r"\d{3}\.\d{3}","\n".join(d['FREQ']))] #print "Parsed freqs:",freqs if freqs: last_sector['freqs']=freqs if kind=='sector': m=re.match(r"ES[A-Z]{2}\s*ACC\s*sector\s*([0-9a-zA-Z]*)",d['name']) if m: last_sector['major']=d['name'] last_sector['majorsector'],=m.groups() if len(arealines)==0: last_sector['name']=d['name'] continue if d['name'].count("Control Area and Upper Control Area"): continue if d['name'].count("SUECIA CTA"): continue if d['name'].count("SUECIA UTA"): continue m=re.match(r"([0-9a-zA-Z]*)(:.*)",d['name']) if m and 'majorsector' in last_sector: sectorname,sub=m.groups() if sectorname==last_sector['majorsector']: d['name']=last_sector['major']+sub #uprint("Fixed up name: ",d['name']) #print "Arealines:",arealines assert len(arealines) if arealines[0].strip()=="Danger area EK D395 and D396 are": arealines=arealines[1:] if arealines[0].strip()=="situated within TMA": arealines=arealines[1:] if arealines==u'Förteckning över CTA / Lists of CTA' or arealines=='Lateral limits': continue for idx in xrange(len(arealines)): if arealines[idx].lower().startswith("established"): last_coord_idx=idx pa['established']=" ".join(l for l in arealines[idx:]) break if arealines[idx].lower().startswith("danger area"): last_coord_idx=idx break if arealines[idx].strip()=="LFV": last_coord_idx=idx break if last_coord_idx==None: last_coord_idx=len(arealines) #uprint("ARealines:",arealines) #uprint("Last coord:",arealines[last_coord_idx-1]) if len(arealines)>last_coord_idx: if arealines[last_coord_idx-1:last_coord_idx+1]==[u'571324N 0161129E -', u'Established during operational hours of']: arealines[last_coord_idx-1]=arealines[last_coord_idx-1].strip("-") #uprint("Last fixed:",arealines[last_coord_idx-1]) assert not arealines[last_coord_idx-1].strip().endswith("-") #for idx in xrange(last_coord_idx-1): # print "arealine: <%s>"%(arealines[idx].strip(),) # assert arealines[idx].strip().endswith("-") or arealines[idx].strip().endswith("to") vertlim=u" ".join(d['Vertical limits']) if vertlim.strip()=="": #print "Object with no vertical limits: %s"%(repr(d['name']),) continue if d['name']=='Control Area': continue uprint("Vertlim: ",vertlim) heightst=re.findall(r"(FL\s*\d{3})|(\d+\s*ft\s*(?:\s*/\s*\d+\s*.\s*GND)?(?:\s*GND)?)|(GND)|(UNL)",vertlim) uprint("Height candidates:",heightst) heights=[] for fl,ht,gnd,unl in heightst: if fl: heights.append(fl) if ht: heights.append(ht.strip()) if gnd: heights.append(gnd.strip()) if unl: heights.append(unl.strip()) uprint("heights for ",d['name'],":",repr(heights)) if len(heights)==0 and d['name']==u'GÖTEBORG TMA': heights=['GND','FL95'] if len(heights)==1 and d['name']==u'Göteborg TMA': heights=['4500','FL95'] assert len(heights)==2 ceiling=heights[0].strip() floor=heights[1].strip() pa['name']=d['name'] pa['floor']=floor pa['ceiling']=ceiling if mapper.parse_elev(floor)>=9500: continue #uprint("Arealines:\n================\n%s\n============\n"%(arealines[:last_coord_idx])) #print pa areacoords=" ".join(arealines[:last_coord_idx]) pa['points']=parse_coord_str(areacoords) vs=[] for p in pa['points']: #print "from_str:",repr(p) x,y=mapper.latlon2merc(mapper.from_str(p),13) vs.append(Vertex(int(x),int(y))) p=Polygon(vvector(vs)) if p.calc_area()<=30*30: pass#print pa #print "Area:",p.calc_area() assert p.calc_area()>30*30 #print "Area: %f"%(p.calc_area(),) #print "Point-counts:",len(pa['points']) for p in pa['points']: assert p.count(",")==1 pa['type']=kind for thirdcol in thirdcols: if thirdcol in d: atc=d[thirdcol] break else: raise Exception("missing thirdcol") #print "ATc: <%s>"%(repr(atc),) freqs=[(y,float(x)) for x,y in re.findall(r"(\d{3}\.\d{3})\s*MHz\n(.*)","\n".join(atc))] if not freqs: freqs=last_sector.get('freqs',[]) #print repr(freqs) pa['freqs']=freqs #uprint("Cleaning up ",pa['name']) for cleaned in clean_up_polygon(list(pa['points'])): d=dict(pa) #print "cleaned",cleaned for i,tup in enumerate(cleaned): assert type(tup)==str latlon=mapper.from_str(tup) lat,lon=latlon assert lat>=-85 and lat<=85 d['points']=cleaned #uprint("cleaned:",pa['name'],len(cleaned),cleaned) #print "name:",d['name'] #print "cleaned points:",d['points'] #print "from:",areacoords #raise Exception() out.append(d) #if pa['name'].lower().count("esrange"): # print "Exit esrange" # sys.exit(1) return out