def ee_parse_tma2(): spaces=[] airac_date=get_airac_date() url="/%s/html/eAIP/EE-ENR-2.1-en-GB.html"%(airac_date,) parser=lxml.html.HTMLParser() data,date=fetchdata.getdata(url,country='ee') parser.feed(data) tree=parser.close() icaos=[] def nested(tab): if tab==None: return False if tab.getparent() is None: return False #print dir(tab) if tab.tag=='table': return True return nested(tab.getparent()) for tab in tree.xpath(".//table"): print "table alltext:",alltext(tab) if nested(tab.getparent()): continue firsttr=tab.xpath(".//tr")[0] ntext=alltext(firsttr) print "firsttr",firsttr print "ntext",ntext if re.match(ur".*FIR\s*/\s*CTA.*",ntext): print "Matches Tallin FIR" name='TALLIN FIR' points=mapper.parse_coord_str(firtxt,context='estonia') floor,ceiling="GND","FL195" space={} space['name']=name space['points']=points space['floor']=floor space['ceiling']=ceiling space['freqs']=[] space['icao']='EETT' space['type']='FIR' space['date']=date space['url']=fetchdata.getrawurl(url,'ee') spaces.append(space) continue else: name=ntext.strip() space=dict(name=name) print "Name",name assert space['name'].count("TMA") \ or space['name'].count("FIR") if space['name'].count("FIR"): type='FIR' else: type="TMA" freqs=[] points=None floor=None ceiling=None for cand in tab.xpath(".//tr"): if len(cand.getchildren())!=2: continue nom,what=cand.getchildren() whattxt=alltext(what) nomtxt=alltext(nom) print "nomtxt",nomtxt,"space name",space['name'] if nomtxt.count("Lateral limits"): if space['name'].count("TALLINN TMA 2"): points=mapper.parse_coord_str(""" A circle with radius 20 NM centred on 592448N 0244957E """) else: whattxt=whattxt.replace( "then along the territory dividing line between Estonia and Russia to", "- Along the common Estonian/X state boundary to " ) print "Fixed up",whattxt points=mapper.parse_coord_str(whattxt,context='estonia') if nomtxt.count("Vertical limits"): floor,ceiling=whattxt.split(" to ") if nomtxt.count("Call sign"): callsign=whattxt.split("\n")[0] if nomtxt.count("freq"): freqs.extend(re.findall(ur"\d+\.\d+\s*MHz")) assert points and floor and ceiling space['points']=points space['type']=type space['floor']=floor space['ceiling']=ceiling space['freqs']=[] space['type']=type space['date']=date space['url']=fetchdata.getrawurl(url,'ee') for freq in freqs: space['freqs'].append((callsign,freq)) spaces.append(space)
def ee_parse_airfields2(): ads = [] spaces = [] airac_date = get_airac_date() print "airac", airac_date overview_url = "/%s/html/eAIP/EE-AD-0.6-en-GB.html" % (airac_date, ) parser = lxml.html.HTMLParser() data, date = fetchdata.getdata(overview_url, country='ee') parser.feed(data) tree = parser.close() icaos = [] for cand in tree.xpath(".//h3"): txts = alltexts(cand.xpath(".//a")) aps = re.findall(r"EE[A-Z]{2}", " ".join(txts)) if aps: icao, = aps if alltext(cand).count("HELIPORT"): print "Ignore heliport", icao continue icaos.append(icao) for icao in icaos: ad = dict(icao=icao) url = "/%s/html/eAIP/EE-AD-2.%s-en-GB.html" % (airac_date, icao) data, date = fetchdata.getdata(url, country='ee') parser.feed(data) tree = parser.close() thrs = [] for h3 in tree.xpath(".//h3"): txt = alltext(h3) print repr(txt) ptrn = ur"\s*%s\s+[—-]\s+(.*)" % (unicode(icao.upper()), ) m = re.match(ptrn, txt, re.UNICODE) if m: assert not 'name' in ad ad['name'] = m.groups()[0] for tr in tree.xpath(".//tr"): txt = alltext(tr) m = re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*", txt) #print "Matching,",txt,":",m if m: crds, = m.groups() ad['pos'] = mapper.anyparse(crds) space = dict() for table in tree.xpath(".//table"): for tr in table.xpath(".//tr"): trtxt = alltext(tr) if trtxt.count("Designation and lateral limits"): space = dict() coords = tr.getchildren()[2] lines = alltext(coords).split("\n") if lines[0].strip() == 'NIL': continue zname, what, spill = re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)", lines[0]).groups() if spill and spill.strip(): rest = [spill] + lines[1:] else: rest = lines[1:] what = what.strip() assert ad['name'].upper().strip().count( zname.upper().strip()) assert what in ['FIZ', 'TIZ', 'CTR'] space['type'] = what space['points'] = mapper.parse_coord_str("\n".join(rest)) space['name'] = zname + " " + what space['date'] = date space['url'] = fetchdata.getrawurl(url, 'ee') if trtxt.count("Vertical limits"): vlim = alltext(tr.getchildren()[2]) if vlim.strip() == 'NIL': continue space['floor'], space['ceiling'] = vlim.split(" to ") #space['freqs']=x #hlc=False for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.lower().count("charts"): par = h4.getparent() for table in par.xpath(".//table"): for idx, tr in enumerate(table.xpath(".//tr")): name,page=\ tr.getchildren() nametxt = alltext(name) print "nametxt:", nametxt, "link:" for reg, variant in [ (r"Aerodrome.*Chart.*", ""), (r"Landing.*Chart.*", "landing"), (r".*Parking.*Chart.*", "parking"), (r".*Visual.*Approach.*|.*\bVAC\b.*", "vac") ]: if re.match(reg, nametxt): for a in page.xpath(".//a"): print "linklabel", a.text print "attrib:", a.attrib href = a.attrib['href'] print "Bef repl", href if href.lower().endswith("pdf"): href = href.replace( "../../graphics", "/%s/graphics" % (airac_date, )) print "href:", href, airac_date assert href parse_landing_chart.help_plc( ad, href, icao, ad['pos'], "ee", variant=variant) """arp=ad['pos'] lc=parse_landing_chart.parse_landing_chart( href, icao=icao, arppos=arp,country="ee") assert lc if lc: ad['adcharturl']=lc['url'] ad['adchart']=lc hlc=True #chartblobnames.append(lc['blobname']) """ #assert hlc for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.count("RUNWAY PHYSICAL"): par = h4.getparent() for table in par.xpath(".//table"): prevnametxt = "" for idx, tr in enumerate(table.xpath(".//tr")): if idx == 0: fc = alltext(tr.getchildren()[0]) print "FC", fc if not fc.count("Designations"): break #skip table if idx < 2: continue if len(tr.getchildren()) == 1: continue print "c:", tr.getchildren(), alltexts( tr.getchildren()) desig, trubrg, dims, strength, thrcoord, threlev = tr.getchildren( ) rwy = re.match(r"(\d{2}[LRC]?)", alltext(desig)) altc = alltext(thrcoord) print "Matching", altc print "rwymatch:", alltext(desig) m = re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*", altc, re.DOTALL | re.MULTILINE) if m: lat, lon = m.groups() print "Got latlon", lat, lon thrs.append( dict(pos=mapper.parse_coords(lat, lon), thr=rwy.groups()[0])) space['freqs'] = [] for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.count("ATS COMMUNICATION"): par = h4.getparent() for table in par.xpath(".//table"): for idx, tr in enumerate(table.xpath(".//tr")): print "cs", repr(tr.getchildren()), alltexts( tr.getchildren()) print len(tr.getchildren()) if len(tr.getchildren()) != 5: if "".join(alltexts( tr.getchildren())).count(u"EMERG"): continue #Sometimes emergency freq is listed, and then it is without callsign service,callsign,frequency,hours,remarks=\ tr.getchildren() callsigntxt = alltext(callsign) if idx < 2: if idx == 0: assert callsigntxt.strip() == "Call sign" if idx == 1: assert callsigntxt.strip() == "2" continue ftext = alltext(frequency) print "matching freq", ftext for freq in re.findall(ur"\b\d{3}\.\d{1,3}", ftext): freqmhz = float(freq) space['freqs'].append( (callsigntxt.strip(), freqmhz)) if space and 'points' in space: assert 'freqs' in space assert 'points' in space assert 'floor' in space assert 'ceiling' in space assert 'type' in space spaces.append(space) if thrs: ad['runways'] = rwy_constructor.get_rwys(thrs) aip_text_documents.help_parse_doc(ad, url, icao, "ee", title="General Information", category="general") ad['date'] = date ad['url'] = fetchdata.getrawurl(url, 'ee') print "AD:", ad assert 'pos' in ad assert 'name' in ad ads.append(ad)
def ee_parse_airfields2(): ads=[] spaces=[] airac_date=get_airac_date() print "airac",airac_date overview_url="/%s/html/eAIP/EE-AD-0.6-en-GB.html"%(airac_date,) parser=lxml.html.HTMLParser() data,date=fetchdata.getdata(overview_url,country='ee') parser.feed(data) tree=parser.close() icaos=[] for cand in tree.xpath(".//h3"): txts=alltexts(cand.xpath(".//a")) aps=re.findall(r"EE[A-Z]{2}"," ".join(txts)) if aps: icao,=aps if alltext(cand).count("HELIPORT"): print "Ignore heliport",icao continue icaos.append(icao) for icao in icaos: ad=dict(icao=icao) url="/%s/html/eAIP/EE-AD-2.%s-en-GB.html"%(airac_date,icao) data,date=fetchdata.getdata(url,country='ee') parser.feed(data) tree=parser.close() thrs=[] for h3 in tree.xpath(".//h3"): txt=alltext(h3) print repr(txt) ptrn=ur"\s*%s\s+[—-]\s+(.*)"%(unicode(icao.upper()),) m=re.match(ptrn,txt,re.UNICODE) if m: assert not 'name' in ad ad['name']=m.groups()[0] for tr in tree.xpath(".//tr"): txt=alltext(tr) m=re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*",txt) #print "Matching,",txt,":",m if m: crds,=m.groups() ad['pos']=mapper.anyparse(crds) space=dict() for table in tree.xpath(".//table"): for tr in table.xpath(".//tr"): trtxt=alltext(tr) if trtxt.count("Designation and lateral limits"): space=dict() coords=tr.getchildren()[2] lines=alltext(coords).split("\n") if lines[0].strip()=='NIL': continue zname,what,spill=re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)",lines[0]).groups() if spill and spill.strip(): rest=[spill]+lines[1:] else: rest=lines[1:] what=what.strip() assert ad['name'].upper().strip().count(zname.upper().strip()) assert what in ['FIZ','TIZ','CTR'] space['type']=what space['points']=mapper.parse_coord_str("\n".join(rest)) space['name']=zname+" "+what space['date']=date space['url']=fetchdata.getrawurl(url,'ee') if trtxt.count("Vertical limits"): vlim=alltext(tr.getchildren()[2]) if vlim.strip()=='NIL': continue space['floor'],space['ceiling']=vlim.split(" to ") #space['freqs']=x #hlc=False for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.lower().count("charts"): par=h4.getparent() for table in par.xpath(".//table"): for idx,tr in enumerate(table.xpath(".//tr")): name,page=\ tr.getchildren() nametxt=alltext(name) print "nametxt:",nametxt,"link:" for reg,variant in [ (r"Aerodrome.*Chart.*","") , (r"Landing.*Chart.*","landing"), (r".*Parking.*Chart.*","parking"), (r".*Visual.*Approach.*|.*\bVAC\b.*","vac") ]: if re.match(reg,nametxt): for a in page.xpath(".//a"): print "linklabel",a.text print "attrib:",a.attrib href=a.attrib['href'] print "Bef repl",href if href.lower().endswith("pdf"): href=href.replace("../../graphics","/%s/graphics"%(airac_date,)) print "href:",href,airac_date assert href parse_landing_chart.help_plc(ad,href, icao,ad['pos'],"ee",variant=variant) """arp=ad['pos'] lc=parse_landing_chart.parse_landing_chart( href, icao=icao, arppos=arp,country="ee") assert lc if lc: ad['adcharturl']=lc['url'] ad['adchart']=lc hlc=True #chartblobnames.append(lc['blobname']) """ #assert hlc for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.count("RUNWAY PHYSICAL"): par=h4.getparent() for table in par.xpath(".//table"): prevnametxt="" for idx,tr in enumerate(table.xpath(".//tr")): if idx==0: fc=alltext(tr.getchildren()[0]) print "FC",fc if not fc.count("Designations"): break #skip table if idx<2:continue if len(tr.getchildren())==1:continue print "c:",tr.getchildren(),alltexts(tr.getchildren()) desig,trubrg,dims,strength,thrcoord,threlev=tr.getchildren() rwy=re.match(r"(\d{2}[LRC]?)",alltext(desig)) altc=alltext(thrcoord) print "Matching",altc print "rwymatch:",alltext(desig) m=re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*",altc,re.DOTALL|re.MULTILINE) if m: lat,lon=m.groups() print "Got latlon",lat,lon thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy.groups()[0])) space['freqs']=[] for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.count("ATS COMMUNICATION"): par=h4.getparent() for table in par.xpath(".//table"): for idx,tr in enumerate(table.xpath(".//tr")): print "cs",repr(tr.getchildren()),alltexts(tr.getchildren()) print len(tr.getchildren()) if len(tr.getchildren())!=5: if "".join(alltexts(tr.getchildren())).count(u"EMERG"): continue #Sometimes emergency freq is listed, and then it is without callsign service,callsign,frequency,hours,remarks=\ tr.getchildren() callsigntxt=alltext(callsign) if idx<2: if idx==0: assert callsigntxt.strip()=="Call sign" if idx==1: assert callsigntxt.strip()=="2" continue ftext=alltext(frequency) print "matching freq",ftext for freq in re.findall(ur"\b\d{3}\.\d{1,3}",ftext): freqmhz=float(freq) space['freqs'].append((callsigntxt.strip(),freqmhz)) if space and 'points' in space: assert 'freqs' in space assert 'points' in space assert 'floor' in space assert 'ceiling' in space assert 'type' in space spaces.append(space) if thrs: ad['runways']=rwy_constructor.get_rwys(thrs) aip_text_documents.help_parse_doc(ad,url, icao,"ee",title="General Information",category="general") ad['date']=date ad['url']=fetchdata.getrawurl(url,'ee') print "AD:",ad assert 'pos' in ad assert 'name' in ad ads.append(ad)
def ee_parse_gen_r2(url): spaces = [] parser = lxml.html.HTMLParser() data, date = fetchdata.getdata(url, country='ee') parser.feed(data) tree = parser.close() print "Parsed tree" for tab in tree.xpath(".//table"): print "Found table" for idx, cand in enumerate(tab.xpath(".//tr")): if len(cand.getchildren()) < 3: continue space = dict() #print list(cand.getchildren()) what, vert, remark = list(cand.getchildren())[0:3] whattxt = alltext(what).replace(u"–", "-").replace(u"\xa0", " ") verttxt = alltext(vert) while True: w = re.sub(ur"\(.*?\)", "", whattxt) if w != whattxt: whattxt = w continue break #print idx,whattxt if idx < 3: if idx == 1: assert (whattxt.count("Identification") or whattxt.count("ateral limits")) if idx == 2: assert whattxt.strip() == "1" continue verttxt = verttxt.replace(u"\xa0", u" ") vertlines = [x for x in verttxt.split("\n") if x.strip()] if len(vertlines) == 1: vertlines = [x for x in verttxt.split(" ") if x.strip()] print "Verlintes:", repr(vertlines) #print "wha------------------------ t",whattxt space['ceiling'], space['floor'] = vertlines[:2] mapper.parse_elev(space['ceiling']) ifloor = mapper.parse_elev(space['floor']) if ifloor >= 9500: continue lines = whattxt.split("\n") out = [] merged = "" for line in lines[1:]: line = line.strip().replace(u"–", "-") if line == "": continue if line.endswith("point"): out.append(line + " ") continue if line.endswith("ircle with radius of") or line.endswith( ",") or line.endswith("on") or line.endswith("radius"): merged = " ".join([merged, line]) print "<---Merged:", merged continue if merged: line = " ".join([merged, line]) merged = "" if not line.endswith("-"): line = line + " -" out.append(line + "\n") space['name'] = lines[0].strip() w = "".join(out) print "Parsing:", w if space['name'].startswith('EER1 '): w = ee_parse_tma2.eer1txt fir = mapper.parse_coord_str(ee_parse_tma2.firtxt, context='estonia') fir_context = [fir] space['points'] = mapper.parse_coord_str( w, fir_context=fir_context) else: space['points'] = mapper.parse_coord_str(w, context='estonia') space['type'] = 'R' space['date'] = date space['freqs'] = [] space['url'] = fetchdata.getrawurl(url, 'ee') spaces.append(space) return spaces
def ee_parse_tma2(): spaces = [] airac_date = get_airac_date() url = "/%s/html/eAIP/EE-ENR-2.1-en-GB.html" % (airac_date, ) parser = lxml.html.HTMLParser() data, date = fetchdata.getdata(url, country='ee') parser.feed(data) tree = parser.close() icaos = [] def nested(tab): if tab == None: return False if tab.getparent() is None: return False #print dir(tab) if tab.tag == 'table': return True return nested(tab.getparent()) for tab in tree.xpath(".//table"): print "table alltext:", alltext(tab) if nested(tab.getparent()): continue firsttr = tab.xpath(".//tr")[0] ntext = alltext(firsttr) print "firsttr", firsttr print "ntext", ntext if re.match(ur".*FIR\s*/\s*CTA.*", ntext): print "Matches Tallin FIR" name = 'TALLIN FIR' points = mapper.parse_coord_str(firtxt, context='estonia') floor, ceiling = "GND", "FL195" space = {} space['name'] = name space['points'] = points space['floor'] = floor space['ceiling'] = ceiling space['freqs'] = [] space['icao'] = 'EETT' space['type'] = 'FIR' space['date'] = date space['url'] = fetchdata.getrawurl(url, 'ee') spaces.append(space) continue else: name = ntext.strip() space = dict(name=name) print "Name", name assert space['name'].count("TMA") \ or space['name'].count("FIR") if space['name'].count("FIR"): type = 'FIR' else: type = "TMA" freqs = [] points = None floor = None ceiling = None for cand in tab.xpath(".//tr"): if len(cand.getchildren()) != 2: continue nom, what = cand.getchildren() whattxt = alltext(what) nomtxt = alltext(nom) print "nomtxt", nomtxt, "space name", space['name'] if nomtxt.count("Lateral limits"): if space['name'].count("TALLINN TMA 2"): points = mapper.parse_coord_str(""" A circle with radius 20 NM centred on 592448N 0244957E """) else: whattxt = whattxt.replace( "then along the territory dividing line between Estonia and Russia to", "- Along the common Estonian/X state boundary to ") print "Fixed up", whattxt points = mapper.parse_coord_str(whattxt, context='estonia') if nomtxt.count("Vertical limits"): floor, ceiling = whattxt.split(" to ") if nomtxt.count("Call sign"): callsign = whattxt.split("\n")[0] if nomtxt.count("freq"): freqs.extend(re.findall(ur"\d+\.\d+\s*MHz")) assert points and floor and ceiling space['points'] = points space['type'] = type space['floor'] = floor space['ceiling'] = ceiling space['freqs'] = [] space['type'] = type space['date'] = date space['url'] = fetchdata.getrawurl(url, 'ee') for freq in freqs: space['freqs'].append((callsign, freq)) spaces.append(space)
def ee_parse_gen_r2(url): spaces=[] parser=lxml.html.HTMLParser() data,date=fetchdata.getdata(url,country='ee') parser.feed(data) tree=parser.close() print "Parsed tree" for tab in tree.xpath(".//table"): print "Found table" for idx,cand in enumerate(tab.xpath(".//tr")): if len(cand.getchildren())<3: continue space=dict() #print list(cand.getchildren()) what,vert,remark=list(cand.getchildren())[0:3] whattxt=alltext(what).replace(u"–","-").replace(u"\xa0"," ") verttxt=alltext(vert) while True: w=re.sub(ur"\(.*?\)","",whattxt) if w!=whattxt: whattxt=w continue break #print idx,whattxt if idx<3: if idx==1: assert (whattxt.count("Identification") or whattxt.count("ateral limits")) if idx==2: assert whattxt.strip()=="1" continue verttxt=verttxt.replace(u"\xa0",u" ") vertlines=[x for x in verttxt.split("\n") if x.strip()] if len(vertlines)==1: vertlines=[x for x in verttxt.split(" ") if x.strip()] print "Verlintes:",repr(vertlines) #print "wha------------------------ t",whattxt space['ceiling'],space['floor']=vertlines[:2] mapper.parse_elev(space['ceiling']) ifloor=mapper.parse_elev(space['floor']) if ifloor>=9500: continue lines=whattxt.split("\n") out=[] merged="" for line in lines[1:]: line=line.strip().replace(u"–","-") if line=="":continue if line.endswith("point"): out.append(line+" ") continue if line.endswith("ircle with radius of") or line.endswith(",") or line.endswith("on") or line.endswith("radius"): merged=" ".join([merged,line]) print "<---Merged:",merged continue if merged: line=" ".join([merged,line]) merged="" if not line.endswith("-"): line=line+" -" out.append(line+"\n") space['name']=lines[0].strip() w="".join(out) print "Parsing:",w if space['name'].startswith('EER1 '): w=ee_parse_tma2.eer1txt fir=mapper.parse_coord_str(ee_parse_tma2.firtxt,context='estonia') fir_context=[fir] space['points']=mapper.parse_coord_str(w,fir_context=fir_context) else: space['points']=mapper.parse_coord_str(w,context='estonia') space['type']='R' space['date']=date space['freqs']=[] space['url']=fetchdata.getrawurl(url,'ee') spaces.append(space) return spaces