assert name assert not ctrname in seen seen.add(ctrname) ad=dict( icao=icao, name=name, elev=elev, date=date, runways=rwy_constructor.get_rwys(thrs), pos=pos) if adcharturl: ad['adcharturl']=adcharturl if 'adcharts' in addummy: ad['adcharts']=addummy['adcharts'] aip_text_documents.help_parse_doc(ad,url, icao,"ev",title="General Information",category="general") ads.append(ad) spaces.append(dict( name=ctrname, points=mapper.parse_coord_str(ctrarea), ceiling=ceiling, type=type_, floor=floor, freqs=freqs, date=date, url=url )) spilve=dict( icao="EVRS", name="Spilve",
def fi_parse_airfield(icao=None): spaces=[] ad=dict() assert icao!=None ad['icao']=icao sigpoints=[] #https://ais.fi/ais/eaip/pdf/aerodromes/EF_AD_2_EFET_EN.pdf #https://ais.fi/ais/eaip/aipcharts/efet/EF_AD_2_EFET_VAC.pdf #vacp=parse.Parser("/ais/eaip/aipcharts/%s/EF_AD_2_%s_VAC.pdf"%(icao.lower(),icao),lambda x: x,country="fi") def remove_italics(x): return x.replace("<i>","").replace("</i>","") p=parse.Parser("/ais/eaip/pdf/aerodromes/EF_AD_2_%s_EN.pdf"%(icao,),remove_italics,country="fi") #The following doesn't actually work, since finnish VAC are bitmaps!!! :-( if 0: vacpage=vacp.parse_page_to_items(0) repp=vacpage.get_by_regex("\s*REPORTING\s*POINTS\s*") assert len(repp)>0 for item in repp: lines=iter(page.get_lines(page.get_partially_in_rect(item.x1,item.y2+0.1,100,100))) for line in lines: uprint("Looking for reporting points:%s"%(line,)) name,lat,lon=re.match(ur"([A-ZÅÄÖ\s ]{3,})\s*([ \d]+N)\s*([ \d]+E).*",line) sigpoints.append(dict( name=icao+" "+name.strip(), kind="reporting", pos=mapper.parse_coords(lat.replace(" ",""),lon.replace(" ","")))) page=p.parse_page_to_items(0) nameregex=ur"%s\s+-\s+([A-ZÅÄÖ\- ]{3,})"%(icao,) for item in page.get_by_regex(nameregex): #print "fontsize:",item.fontsize assert item.fontsize>=14 ad['name']=re.match(nameregex,item.text).groups()[0].strip() break for item in page.get_by_regex(ur".*ELEV\s*/\s*REF.*"): lines=page.get_lines(page.get_partially_in_rect(0,item.y1+0.1,100,item.y2-0.1)) for line in lines: print "Line:",line ft,=re.match(".*ELEV.*([\d\.]+)\s*FT.*",line).groups() assert not 'elev' in ad ad['elev']=float(ft) for item in page.get_by_regex(ur"Mittapisteen.*sijainti"): lines=page.get_lines(page.get_partially_in_rect(item.x1,item.y1,100,item.y2)) for line in lines: for crd in mapper.parsecoords(line): assert not ('pos' in ad) ad['pos']=crd parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_ADC.pdf"%(icao.lower(),icao.upper()), icao,ad['pos'],country='fi' ) parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_VAC.pdf"%(icao.lower(),icao.upper()), icao,ad['pos'],country='fi',variant='VAC' ) parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_LDG.pdf"%(icao.lower(),icao.upper()), icao,ad['pos'],country='fi',variant='landing' ) parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_APDC.pdf"%(icao.lower(),icao.upper()), icao,ad['pos'],country='fi',variant='parking' ) aip_text_documents.help_parse_doc(ad,"/ais/eaip/pdf/aerodromes/EF_AD_2_%s_EN.pdf"%(icao.upper(),), icao,"fi",title="General Information",category="general") ad['runways']=[] thrs=[] freqs=[] for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) if page==None: continue for item in page.get_by_regex("\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*"): lines=page.get_lines(page.get_partially_in_rect(0,item.y2+0.1,100,100)) for line in lines: if re.match(ur"AD\s+2.13",line): break m=re.match(ur".*?(RWY END)?\s*\*?(\d{6}\.\d+N)\s*(\d{6,7}\.\d+E).*",line) if not m:continue rwyend,lat,lon=m.groups() rwytxts=page.get_lines(page.get_partially_in_rect(0,line.y1,12,line.y2)) print "Rwytxts:",rwytxts rwytxt,=rwytxts uprint("rwytext:",rwytxt) rwy,=re.match(ur"\s*(\d{2}[LRCM]?)\s*[\d.]*\s*",rwytxt).groups() have_thr=False for thr in thrs: if thr['thr']==rwy: have_thr=True if rwyend!=None and have_thr: continue thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy)) for item in page.get_by_regex("ATS AIRSPACE"): lines=iter(page.get_lines(page.get_partially_in_rect(0,item.y2+0.1,100,100))) spaces=[] line=lines.next() while True: while line.strip()=="": line=lines.next() print "Read line:",line if line.count("Vertical limits"): break m=re.match(ur".*?/\s+Designation and lateral limits\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:?\s*$",line) if not m: m=re.match(ur"\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:",line) #print "Second try:",m spacename,=m.groups() #print "Got spacename:",spacename assert spacename.strip()!="" coords=[] while True: line=lines.next() print "Further:",line if line.count("Vertical limits"): print "Breaking" break if not re.search(ur"[\d ]+N\s*[\d ]+E",line) and \ not re.search(ur"circle|cent[red]{1,5}|pitkin|point|equal\s*to",line): print "Breaking" break coords.append(line) areaspec="".join(coords) def fixup(m): lat,lon=m.groups() return lat.replace(" ","")+" "+lon.replace(" ","") areaspec=re.sub(ur"([\d ]+N)\s*([\d ]+E)",fixup,areaspec) areaspec=re.sub(ur"\(.*/\s*equal\s*to\s*Malmi\s*CTR\s*lateral\s*limits\)","",areaspec) #print "Fixed areaspec",areaspec #if icao=="EFKS": # areaspec=areaspec.replace("6615 28N","661528N") #Error! REstriction areas! spaces.append(dict( name=spacename, type="CTR", points=mapper.parse_coord_str(areaspec))) if line.count("Vertical limits"): #print "Breaking" break while not line.count("Vertical limits"): line=lines.next() #print "Matching veritcal limits--------------------------------" oldspaces=spaces spaces=[] for space in oldspaces: if space['name'].count("/"): a,b=space['name'].split("/") spaces.append(dict(space,name=a.strip())) spaces.append(dict(space,name=b.strip())) else: spaces.append(space) missing=set([space['name'] for space in spaces]) while True: for space in spaces: for it in xrange(3): cand=space['name'] if it==1: if cand.count("CTR"): cand="CTR" if cand.count("FIZ"): cand="FIZ" if it==2: if cand.count("CTR"): cand=r"CTR\s*/[\sA-Z]+" if cand.count("FIZ UPPER"): cand="FIZ UPPER" if cand.count("FIZ LOWER"): cand="FIZ LOWER" m=re.match(ur".*%s\s*:([^,:-]*)\s*-\s*([^,:-]*)"%(cand,),line) print "Matching ",cand," to ",line,"missing:",missing,m if m: break if len(spaces)==1 and not m: m=re.match(ur".*Vertical limits\s*(.*)\s*-\s*(.*)",line) if m: print "*****MATCH!!:::",m.groups() for lim in m.groups(): assert lim.count(",")==0 space['floor'],space['ceiling']=m.groups() missing.remove(space['name']) #print "Missing:" if len(missing)==0: break if len(missing)==0: break #print "Still missing:",missing line=lines.next() print "Parse f o n page",pagenr for item2 in page.get_by_regex(ur".*ATS\s*COMMUNICATION\s*FACILITIES.*"): lines=page.get_lines(page.get_partially_in_rect(0,item2.y2+0.1,100,100)) for line in lines: if line.count("RADIO NAVIGATION AND LANDING AIDS"): break print "Comm line:",line twr=re.match(ur"TWR.*(\d{3}\.\d{3})\b.*",line) if twr: freqs.append(('TWR',float(twr.groups()[0]))) atis=re.match(ur"ATIS.*(\d{3}\.\d{3})",line) if atis: freqs.append(('ATIS',float(atis.groups()[0])))
def extract_airfields(filtericao=lambda x:True,purge=True): #print getxml("/AIP/AD/AD 1/ES_AD_1_1_en.pdf") ads=[] p=Parser("/AIP/AD/AD 1/ES_AD_1_1_en.pdf") points=dict() startpage=None for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) if page.count("Aerodrome directory"): startpage=pagenr break if startpage==None: raise Exception("Couldn't find aerodrome directory in file") #print "Startpage: %d"%(startpage,) #nochartf=open("nochart.txt","w") for pagenr in xrange(startpage,p.get_num_pages()): row_y=[] page=p.parse_page_to_items(pagenr) allines=[x for x in (page.get_lines(page.get_partially_in_rect(0,0,15,100))) if x.strip()] for item,next in zip(allines,allines[1:]+[""]): #print "item:",item m=re.match(ur"^\s*[A-ZÅÄÖ]{3,}(?:/.*)?\b.*",item) if m: #print "Candidate, next is:",next if re.match(r"^\s*[A-Z]{4}\b.*",next): #print "Matched:",item #print "y1:",item.y1 row_y.append(item.y1) for y1,y2 in zip(row_y,row_y[1:]+[100.0]): #print "Extacting from y-range: %f-%f"%(y1,y2) items=list(page.get_partially_in_rect(0,y1-0.25,5.0,y2+0.25,ysort=True)) if len(items)>=2: #print "Extract items",items ad=dict(name=unicode(items[0].text).strip(), icao=unicode(items[1].text).strip() ) #print "Icao:",ad['icao'] assert re.match(r"[A-Z]{4}",ad['icao']) if not filtericao(ad): continue if len(items)>=3: #print "Coord?:",items[2].text m=re.match(r".*(\d{6}N)\s*(\d{7}E).*",items[2].text) if m: lat,lon=m.groups() ad['pos']=parse_coords(lat,lon) #print "Items3:",items[3:] elev=re.findall(r"(\d{1,5})\s*ft"," ".join(t.text for t in items[3:])) #print "Elev:",elev assert len(elev)==1 ad['elev']=int(elev[0]) ads.append(ad) big_ad=set() for ad in ads: if not ad.has_key('pos'): big_ad.add(ad['icao']) for ad in ads: icao=ad['icao'] if icao in big_ad: if icao in ['ESIB','ESNY','ESCM','ESPE']: continue try: p=Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_6_1_en.pdf"%(icao,icao)) except: p=Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_6-1_en.pdf"%(icao,icao)) ad['aipvacurl']=p.get_url() for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) """ for altline in exitlines: m=re.match(r"(\w+)\s+(\d+N)\s*(\d+E.*)",altline) if not m: continue name,lat,lon=m.groups() try: coord=parse_coords(lat,lon) except Exception: continue points.append(dict(name=name,pos=coord)) """ for kind in xrange(2): if kind==0: hits=page.get_by_regex(r"H[Oo][Ll][Dd][Ii][Nn][Gg]") kind="holding point" if kind==1: hits=page.get_by_regex(r"[Ee]ntry.*[Ee]xit.*point") kind="entry/exit point" if len(hits)==0: continue for holdingheading in hits: items=sorted(page.get_partially_in_rect(holdingheading.x1+2.0,holdingheading.y2+0.1,holdingheading.x1+0.5,100), key=lambda x:x.y1) items=[x for x in items if not x.text.startswith(" ")] #print "Holding items:",items for idx,item in enumerate(items): print "Holding item",item y1=item.y1 if idx==len(items)-1: y2=100 else: y2=items[idx+1].y1 items2=[x for x in page.get_partially_in_rect(item.x1+1,y1+0.3,item.x1+40,y2-0.1) if x.x1>=item.x1-0.25 and x.y1>=y1-0.05 and x.y1<y2-0.05] s=(" ".join(page.get_lines(items2))).strip() print "Holding lines:",repr(page.get_lines(items2)) #if s.startswith("ft Left/3"): #Special case for ESOK # s,=re.match("ft Left/3.*?([A-Z]{4,}.*)",s).groups() #m=re.match("ft Left/\d+.*?([A-Z]{4,}.*)",s) #if m: # s,=m.groups() if s.startswith("LjUNG"): #Really strange problem with ESCF s=s[0]+"J"+s[2:] if s.lower().startswith("holding"): sl=s.split(" ",1) if len(sl)>1: s=sl[1] s=s.strip() if kind=="entry/exit point" and s.startswith("HOLDING"): continue #reached HOLDING-part of VAC #Check for other headings #Fixup strange formatting of points in some holding items: (whitespace between coord and 'E') s=re.sub(ur"(\d+)\s*(N)\s*(\d+)\s*(E)",lambda x:"".join(x.groups()),s) m=re.match(r"([A-Z]{2,}).*?(\d+N)\s*(\d+E).*",s) if not m: m=re.match(r".*?(\d+N)\s*(\d+E).*",s) if not m: continue assert m lat,lon=m.groups() #skavsta if icao=="ESKN": if s.startswith(u"Hold north of T"): name="NORTH" elif s.startswith(u"Hold south of B"): name="SOUTH" else: assert 0 #add more specials here else: continue else: name,lat,lon=m.groups() try: coord=parse_coords(lat,lon) except Exception: print "Couldn't parse:",lat,lon continue #print name,lat,lon,mapper.format_lfv(*mapper.from_str(coord)) if name.count("REMARK") or len(name)<=2: print "Suspicious name: ",name #sys.exit(1) continue points[icao+' '+name]=dict(name=icao+' '+name,icao=icao,pos=coord,kind=kind) #for point in points.items(): # print point #sys.exit(1) def fixhex11(s): out=[] for c in s: i=ord(c) if i>=0x20: out.append(c) continue if i in [0x9,0xa,0xd]: out.append(c) continue out.append(' ') return "".join(out) for ad in ads: icao=ad['icao'] if icao in big_ad: #print "Parsing ",icao p=Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_en.pdf"%(icao,icao),loadhook=fixhex11) ad['aiptexturl']=p.get_url() firstpage=p.parse_page_to_items(0) te="\n".join(firstpage.get_all_lines()) #print te coords=re.findall(r"ARP.*(\d{6}N)\s*(\d{7}E)",te) if len(coords)>1: raise Exception("First page of airport info (%s) does not contain exactly ONE set of coordinates"%(icao,)) if len(coords)==0: print "Couldn't find coords for ",icao #print "Coords:",coords ad['pos']=parse_coords(*coords[0]) elev=re.findall(r"Elevation.*?(\d{1,5})\s*ft",te,re.DOTALL) if len(elev)>1: raise Exception("First page of airport info (%s) does not contain exactly ONE elevation in ft"%(icao,)) if len(elev)==0: print "Couldn't find elev for ",icao ad['elev']=int(elev[0]) freqs=[] found=False thrs=[] #uprint("-------------------------------------") for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) #uprint("Looking on page %d"%(pagenr,)) if 0: #opening hours are no longer stored in a separate document for any airports. No need to detect which any more (since none are). for item in page.get_by_regex(".*OPERATIONAL HOURS.*"): lines=page.get_lines(page.get_partially_in_rect(0,item.y2+0.1,100,100)) for line in lines: things=["ATS","Fuelling","Operating"] if not line.count("AIP SUP"): continue for thing in things: if line.count(thing): ad['aipsup']=True for item in page.get_by_regex(".*\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*.*"): #uprint("Physical char on page") lines=page.get_lines(page.get_partially_in_rect(0,item.y2+0.1,100,100)) seen_end_rwy_text=False for line,nextline in izip(lines,lines[1:]+[None]): #uprint("MAtching: <%s>"%(line,)) if re.match(ur"AD\s+2.13",line): break if line.count("Slope of"): break if line.lower().count("end rwy:"): seen_end_rwy_text=True if line.lower().count("bgn rwy:"): seen_end_rwy_text=True m=re.match(ur".*(\d{6}\.\d+)[\s\(\)\*]*(N).*",line) if not m:continue m2=re.match(ur".*(\d{6,7}\.\d+)\s*[\s\(\)\*]*(E).*",nextline) if not m2:continue latd,n=m.groups() lond,e=m2.groups() assert n=="N" assert e=="E" lat=latd+n lon=lond+e rwytxts=page.get_lines(page.get_partially_in_rect(0,line.y1+0.05,12,nextline.y2-0.05)) uprint("Rwytxts:",rwytxts) rwy=None for rwytxt in rwytxts: #uprint("lat,lon:%s,%s"%(lat,lon)) #uprint("rwytext:",rwytxt) m=re.match(ur"\s*(\d{2}[LRCM]?)\b.*",rwytxt) if m: assert rwy==None rwy=m.groups()[0] if rwy==None and seen_end_rwy_text: continue print "Cur airport:",icao already=False assert rwy!=None seen_end_rwy_text=False for thr in thrs: if thr['thr']==rwy: raise Exception("Same runway twice on airfield:"+icao) thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy)) assert len(thrs)>=2 for pagenr in xrange(0,p.get_num_pages()): page=p.parse_page_to_items(pagenr) matches=page.get_by_regex(r".*ATS\s+COMMUNICATION\s+FACILITIES.*") #print "Matches of ATS COMMUNICATION FACILITIES on page %d: %s"%(pagenr,matches) if len(matches)>0: commitem=matches[0] curname=None callsign=page.get_by_regex_in_rect(ur"Call\s*sign",0,commitem.y1,100,commitem.y2+8)[0] for idx,item in enumerate(page.get_lines(page.get_partially_in_rect(callsign.x1-0.5,commitem.y1,100,100),fudge=0.3,order_fudge=15)): if item.strip()=="": curname=None if re.match(".*RADIO\s+NAVIGATION\s+AND\s+LANDING\s+AIDS.*",item): break #print "Matching:",item m=re.match(r"(.*?)\s*(\d{3}\.\d{1,3})\s*MHz.*",item) #print "MHZ-match:",m if not m: continue #print "MHZ-match:",m.groups() who,sfreq=m.groups() freq=float(sfreq) if abs(freq-121.5)<1e-4: if who.strip(): curname=who continue #Ignore emergency frequency, it is understood if not who.strip(): if curname==None: continue else: curname=who freqs.append((curname.strip().rstrip("/"),freq)) for pagenr in xrange(0,p.get_num_pages()): page=p.parse_page_to_items(pagenr) matches=page.get_by_regex(r".*ATS\s*AIRSPACE.*") #print "Matches of ATS_AIRSPACE on page %d: %s"%(pagenr,matches) if len(matches)>0: heading=matches[0] desigitem,=page.get_by_regex("Designation and lateral limits") vertitem,=page.get_by_regex("Vertical limits") airspaceclass,=page.get_by_regex("Airspace classification") lastname=None subspacelines=dict() subspacealts=dict() for idx,item in enumerate(page.get_lines(page.get_partially_in_rect(desigitem.x2+1,desigitem.y1,100,vertitem.y1-1))): if item.count("ATS airspace not established"): assert idx==0 break if item.strip()=="": continue m=re.match(r"(.*?)(\d{6}N\s+.*)",item) if m: name,coords=m.groups() name=name.strip() else: name=item.strip() coords=None if name: lastname=name if coords: subspacelines.setdefault(lastname,[]).append(coords) assert lastname lastname=None #print "Spaces:",subspacelines #print "ICAO",ad['icao'] #altlines=page.get_lines(page.get_partially_in_rect(vertitem.x2+1,vertitem.y1,100,airspaceclass.y1-0.2)) #print "Altlines:",altlines subspacealts=dict() subspacekeys=subspacelines.keys() allaltlines=" ".join(page.get_lines(page.get_partially_in_rect(vertitem.x1+0.5,vertitem.y1+0.5,100,airspaceclass.y1-0.2))) single_vertlim=False totalts=list(mapper.parse_all_alts(allaltlines)) #print "totalts:",totalts if len(totalts)==2: single_vertlim=True for subspacename in subspacekeys: ceil=None floor=None subnames=[subspacename] if subspacename.split(" ")[-1].strip() in ["TIA","TIZ","CTR","CTR/TIZ"]: subnames.append(subspacename.split(" ")[-1].strip()) #print "Parsing alts for ",subspacename,subnames try: for nametry in subnames: if single_vertlim: #there's only one subspace, parse all of vertical limits field for this single one. items=[vertitem] else: items=page.get_by_regex_in_rect(nametry,vertitem.x2+1,vertitem.y1,100,airspaceclass.y1-0.2) for item in items: alts=[] for line in page.get_lines(page.get_partially_in_rect(item.x1+0.5,item.y1+0.5,100,airspaceclass.y1-0.2)): #print "Parsing:",line line=line.replace(nametry,"").lower().strip() parsed=list(mapper.parse_all_alts(line)) if len(parsed): alts.append(mapper.altformat(*parsed[0])) if len(alts)==2: break if alts: #print "alts:",alts ceil,floor=alts raise StopIteration except StopIteration: pass assert ceil and floor subspacealts[subspacename]=dict(ceil=ceil,floor=floor) spaces=[] for spacename in subspacelines.keys(): altspacename=spacename #print "Altspacename: %s, subspacesalts: %s"%(altspacename,subspacealts) space=dict( name=spacename, ceil=subspacealts[altspacename]['ceil'], floor=subspacealts[altspacename]['floor'], points=parse_coord_str(" ".join(subspacelines[spacename])), freqs=list(set(freqs)) ) if True: vs=[] for p in space['points']: x,y=mapper.latlon2merc(mapper.from_str(p),13) vs.append(Vertex(int(x),int(y))) p=Polygon(vvector(vs)) if p.calc_area()<=30*30: pass#print space pass#print "Area:",p.calc_area() assert p.calc_area()>30*30 #print "Area: %f"%(p.calc_area(),) spaces.append(space) #print space ad['spaces']=spaces found=True if found: break assert found ad['runways']=rwy_constructor.get_rwys(thrs) #Now find any ATS-airspace chartblobnames=[] for ad in ads: icao=ad['icao'] if icao in big_ad: parse_landing_chart.help_plc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_2-1_en.pdf"%(icao,icao), icao,ad['pos'],"se",variant="") parse_landing_chart.help_plc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_6-1_en.pdf"%(icao,icao), icao,ad['pos'],"se",variant="vac") parse_landing_chart.help_plc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_2-3_en.pdf"%(icao,icao), icao,ad['pos'],"se",variant="parking") #aip_text_documents.help_parse_doc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_6_1_en.pdf"%(icao,icao), # icao,"se",title="General Information",category="general") aip_text_documents.help_parse_doc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_en.pdf"%(icao,icao), icao,"se",title="General Information",category="general") #if purge: # parse_landing_chart.purge_old(chartblobnames,country="se") #sys.exit(1) for extra in extra_airfields.extra_airfields: if filtericao(extra): ads.append(extra) print print for k,v in sorted(points.items()): print k,v,mapper.format_lfv(*mapper.from_str(v['pos'])) #print "Num points:",len(points) origads=list(ads) for flygkartan_id,name,lat,lon,dummy in csv.reader(open("fplan/extract/flygkartan.csv"),delimiter=";"): found=None lat=float(lat) lon=float(lon) if type(name)==str: name=unicode(name,'utf8') mercf=mapper.latlon2merc((lat,lon),13) for a in origads: merca=mapper.latlon2merc(mapper.from_str(a['pos']),13) dist=math.sqrt((merca[0]-mercf[0])**2+(merca[1]-mercf[1])**2) if dist<120: found=a break if found: found['flygkartan_id']=flygkartan_id else: d=dict( icao='ZZZZ', name=name, pos=mapper.to_str((lat,lon)), elev=int(get_terrain_elev((lat,lon))), flygkartan_id=flygkartan_id) if filtericao(d): ads.append(d) minor_ad_charts=extra_airfields.minor_ad_charts for ad in ads: if ad['name'].count(u"Långtora"): ad['pos']=mapper.to_str(mapper.from_aviation_format("5944.83N01708.20E")) if ad['name'] in minor_ad_charts: charturl=minor_ad_charts[ad['name']] arp=ad['pos'] if 'icao' in ad and ad['icao'].upper()!='ZZZZ': icao=ad['icao'].upper() else: icao=ad['fake_icao'] parse_landing_chart.help_plc(ad,charturl,icao,arp,country='raw',variant="landing") """ assert icao!=None lc=parse_landing_chart.parse_landing_chart( charturl, icao=icao, arppos=arp,country="raw") assert lc if lc: ad['adcharturl']=lc['url'] ad['adchart']=lc """ #print ads for ad in ads: print "%s: %s - %s (%s ft) (%s)"%(ad['icao'],ad['name'],ad['pos'],ad['elev'],ad.get('flygkartan_id','inte i flygkartan')) for space in ad.get('spaces',[]): for freq in space.get('freqs',[]): print " ",freq #if 'spaces' in ad: # print " spaces: %s"%(ad['spaces'],) #if 'aiptext' in ad: # print "Aip texts:",ad['aiptext'] #else: # print "No aiptext" print "Points:" for point in sorted(points.values(),key=lambda x:x['name']): print point f=codecs.open("extract_airfields.regress.txt","w",'utf8') for ad in ads: r=repr(ad) d=md5.md5(r).hexdigest() f.write("%s - %s - %s\n"%(ad['icao'],ad['name'],d)) f.close() f=codecs.open("extract_airfields.regress-details.txt","w",'utf8') for ad in ads: r=repr(ad) f.write(u"%s - %s - %s\n"%(ad['icao'],ad['name'],r)) f.close() return ads,points.values()
def ee_parse_airfields2(): ads = [] spaces = [] airac_date = get_airac_date() print "airac", airac_date overview_url = "/%s/html/eAIP/EE-AD-0.6-en-GB.html" % (airac_date, ) parser = lxml.html.HTMLParser() data, date = fetchdata.getdata(overview_url, country='ee') parser.feed(data) tree = parser.close() icaos = [] for cand in tree.xpath(".//h3"): txts = alltexts(cand.xpath(".//a")) aps = re.findall(r"EE[A-Z]{2}", " ".join(txts)) if aps: icao, = aps if alltext(cand).count("HELIPORT"): print "Ignore heliport", icao continue icaos.append(icao) for icao in icaos: ad = dict(icao=icao) url = "/%s/html/eAIP/EE-AD-2.%s-en-GB.html" % (airac_date, icao) data, date = fetchdata.getdata(url, country='ee') parser.feed(data) tree = parser.close() thrs = [] for h3 in tree.xpath(".//h3"): txt = alltext(h3) print repr(txt) ptrn = ur"\s*%s\s+[—-]\s+(.*)" % (unicode(icao.upper()), ) m = re.match(ptrn, txt, re.UNICODE) if m: assert not 'name' in ad ad['name'] = m.groups()[0] for tr in tree.xpath(".//tr"): txt = alltext(tr) m = re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*", txt) #print "Matching,",txt,":",m if m: crds, = m.groups() ad['pos'] = mapper.anyparse(crds) space = dict() for table in tree.xpath(".//table"): for tr in table.xpath(".//tr"): trtxt = alltext(tr) if trtxt.count("Designation and lateral limits"): space = dict() coords = tr.getchildren()[2] lines = alltext(coords).split("\n") if lines[0].strip() == 'NIL': continue zname, what, spill = re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)", lines[0]).groups() if spill and spill.strip(): rest = [spill] + lines[1:] else: rest = lines[1:] what = what.strip() assert ad['name'].upper().strip().count( zname.upper().strip()) assert what in ['FIZ', 'TIZ', 'CTR'] space['type'] = what space['points'] = mapper.parse_coord_str("\n".join(rest)) space['name'] = zname + " " + what space['date'] = date space['url'] = fetchdata.getrawurl(url, 'ee') if trtxt.count("Vertical limits"): vlim = alltext(tr.getchildren()[2]) if vlim.strip() == 'NIL': continue space['floor'], space['ceiling'] = vlim.split(" to ") #space['freqs']=x #hlc=False for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.lower().count("charts"): par = h4.getparent() for table in par.xpath(".//table"): for idx, tr in enumerate(table.xpath(".//tr")): name,page=\ tr.getchildren() nametxt = alltext(name) print "nametxt:", nametxt, "link:" for reg, variant in [ (r"Aerodrome.*Chart.*", ""), (r"Landing.*Chart.*", "landing"), (r".*Parking.*Chart.*", "parking"), (r".*Visual.*Approach.*|.*\bVAC\b.*", "vac") ]: if re.match(reg, nametxt): for a in page.xpath(".//a"): print "linklabel", a.text print "attrib:", a.attrib href = a.attrib['href'] print "Bef repl", href if href.lower().endswith("pdf"): href = href.replace( "../../graphics", "/%s/graphics" % (airac_date, )) print "href:", href, airac_date assert href parse_landing_chart.help_plc( ad, href, icao, ad['pos'], "ee", variant=variant) """arp=ad['pos'] lc=parse_landing_chart.parse_landing_chart( href, icao=icao, arppos=arp,country="ee") assert lc if lc: ad['adcharturl']=lc['url'] ad['adchart']=lc hlc=True #chartblobnames.append(lc['blobname']) """ #assert hlc for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.count("RUNWAY PHYSICAL"): par = h4.getparent() for table in par.xpath(".//table"): prevnametxt = "" for idx, tr in enumerate(table.xpath(".//tr")): if idx == 0: fc = alltext(tr.getchildren()[0]) print "FC", fc if not fc.count("Designations"): break #skip table if idx < 2: continue if len(tr.getchildren()) == 1: continue print "c:", tr.getchildren(), alltexts( tr.getchildren()) desig, trubrg, dims, strength, thrcoord, threlev = tr.getchildren( ) rwy = re.match(r"(\d{2}[LRC]?)", alltext(desig)) altc = alltext(thrcoord) print "Matching", altc print "rwymatch:", alltext(desig) m = re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*", altc, re.DOTALL | re.MULTILINE) if m: lat, lon = m.groups() print "Got latlon", lat, lon thrs.append( dict(pos=mapper.parse_coords(lat, lon), thr=rwy.groups()[0])) space['freqs'] = [] for h4 in tree.xpath(".//h4"): txt = alltext(h4) if txt.count("ATS COMMUNICATION"): par = h4.getparent() for table in par.xpath(".//table"): for idx, tr in enumerate(table.xpath(".//tr")): print "cs", repr(tr.getchildren()), alltexts( tr.getchildren()) print len(tr.getchildren()) if len(tr.getchildren()) != 5: if "".join(alltexts( tr.getchildren())).count(u"EMERG"): continue #Sometimes emergency freq is listed, and then it is without callsign service,callsign,frequency,hours,remarks=\ tr.getchildren() callsigntxt = alltext(callsign) if idx < 2: if idx == 0: assert callsigntxt.strip() == "Call sign" if idx == 1: assert callsigntxt.strip() == "2" continue ftext = alltext(frequency) print "matching freq", ftext for freq in re.findall(ur"\b\d{3}\.\d{1,3}", ftext): freqmhz = float(freq) space['freqs'].append( (callsigntxt.strip(), freqmhz)) if space and 'points' in space: assert 'freqs' in space assert 'points' in space assert 'floor' in space assert 'ceiling' in space assert 'type' in space spaces.append(space) if thrs: ad['runways'] = rwy_constructor.get_rwys(thrs) aip_text_documents.help_parse_doc(ad, url, icao, "ee", title="General Information", category="general") ad['date'] = date ad['url'] = fetchdata.getrawurl(url, 'ee') print "AD:", ad assert 'pos' in ad assert 'name' in ad ads.append(ad)
ad['pos'], country='fi', variant='landing') parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_APDC.pdf" % (icao.lower(), icao.upper()), icao, ad['pos'], country='fi', variant='parking') aip_text_documents.help_parse_doc( ad, "/ais/eaip/pdf/aerodromes/EF_AD_2_%s_EN.pdf" % (icao.upper(), ), icao, "fi", title="General Information", category="general") ad['runways'] = [] thrs = [] freqs = [] for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) if page == None: continue for item in page.get_by_regex( "\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*"): lines = page.get_lines( page.get_partially_in_rect(0, item.y2 + 0.1, 100, 100)) for line in lines:
def ee_parse_airfields2(): ads=[] spaces=[] airac_date=get_airac_date() print "airac",airac_date overview_url="/%s/html/eAIP/EE-AD-0.6-en-GB.html"%(airac_date,) parser=lxml.html.HTMLParser() data,date=fetchdata.getdata(overview_url,country='ee') parser.feed(data) tree=parser.close() icaos=[] for cand in tree.xpath(".//h3"): txts=alltexts(cand.xpath(".//a")) aps=re.findall(r"EE[A-Z]{2}"," ".join(txts)) if aps: icao,=aps if alltext(cand).count("HELIPORT"): print "Ignore heliport",icao continue icaos.append(icao) for icao in icaos: ad=dict(icao=icao) url="/%s/html/eAIP/EE-AD-2.%s-en-GB.html"%(airac_date,icao) data,date=fetchdata.getdata(url,country='ee') parser.feed(data) tree=parser.close() thrs=[] for h3 in tree.xpath(".//h3"): txt=alltext(h3) print repr(txt) ptrn=ur"\s*%s\s+[—-]\s+(.*)"%(unicode(icao.upper()),) m=re.match(ptrn,txt,re.UNICODE) if m: assert not 'name' in ad ad['name']=m.groups()[0] for tr in tree.xpath(".//tr"): txt=alltext(tr) m=re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*",txt) #print "Matching,",txt,":",m if m: crds,=m.groups() ad['pos']=mapper.anyparse(crds) space=dict() for table in tree.xpath(".//table"): for tr in table.xpath(".//tr"): trtxt=alltext(tr) if trtxt.count("Designation and lateral limits"): space=dict() coords=tr.getchildren()[2] lines=alltext(coords).split("\n") if lines[0].strip()=='NIL': continue zname,what,spill=re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)",lines[0]).groups() if spill and spill.strip(): rest=[spill]+lines[1:] else: rest=lines[1:] what=what.strip() assert ad['name'].upper().strip().count(zname.upper().strip()) assert what in ['FIZ','TIZ','CTR'] space['type']=what space['points']=mapper.parse_coord_str("\n".join(rest)) space['name']=zname+" "+what space['date']=date space['url']=fetchdata.getrawurl(url,'ee') if trtxt.count("Vertical limits"): vlim=alltext(tr.getchildren()[2]) if vlim.strip()=='NIL': continue space['floor'],space['ceiling']=vlim.split(" to ") #space['freqs']=x #hlc=False for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.lower().count("charts"): par=h4.getparent() for table in par.xpath(".//table"): for idx,tr in enumerate(table.xpath(".//tr")): name,page=\ tr.getchildren() nametxt=alltext(name) print "nametxt:",nametxt,"link:" for reg,variant in [ (r"Aerodrome.*Chart.*","") , (r"Landing.*Chart.*","landing"), (r".*Parking.*Chart.*","parking"), (r".*Visual.*Approach.*|.*\bVAC\b.*","vac") ]: if re.match(reg,nametxt): for a in page.xpath(".//a"): print "linklabel",a.text print "attrib:",a.attrib href=a.attrib['href'] print "Bef repl",href if href.lower().endswith("pdf"): href=href.replace("../../graphics","/%s/graphics"%(airac_date,)) print "href:",href,airac_date assert href parse_landing_chart.help_plc(ad,href, icao,ad['pos'],"ee",variant=variant) """arp=ad['pos'] lc=parse_landing_chart.parse_landing_chart( href, icao=icao, arppos=arp,country="ee") assert lc if lc: ad['adcharturl']=lc['url'] ad['adchart']=lc hlc=True #chartblobnames.append(lc['blobname']) """ #assert hlc for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.count("RUNWAY PHYSICAL"): par=h4.getparent() for table in par.xpath(".//table"): prevnametxt="" for idx,tr in enumerate(table.xpath(".//tr")): if idx==0: fc=alltext(tr.getchildren()[0]) print "FC",fc if not fc.count("Designations"): break #skip table if idx<2:continue if len(tr.getchildren())==1:continue print "c:",tr.getchildren(),alltexts(tr.getchildren()) desig,trubrg,dims,strength,thrcoord,threlev=tr.getchildren() rwy=re.match(r"(\d{2}[LRC]?)",alltext(desig)) altc=alltext(thrcoord) print "Matching",altc print "rwymatch:",alltext(desig) m=re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*",altc,re.DOTALL|re.MULTILINE) if m: lat,lon=m.groups() print "Got latlon",lat,lon thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy.groups()[0])) space['freqs']=[] for h4 in tree.xpath(".//h4"): txt=alltext(h4) if txt.count("ATS COMMUNICATION"): par=h4.getparent() for table in par.xpath(".//table"): for idx,tr in enumerate(table.xpath(".//tr")): print "cs",repr(tr.getchildren()),alltexts(tr.getchildren()) print len(tr.getchildren()) if len(tr.getchildren())!=5: if "".join(alltexts(tr.getchildren())).count(u"EMERG"): continue #Sometimes emergency freq is listed, and then it is without callsign service,callsign,frequency,hours,remarks=\ tr.getchildren() callsigntxt=alltext(callsign) if idx<2: if idx==0: assert callsigntxt.strip()=="Call sign" if idx==1: assert callsigntxt.strip()=="2" continue ftext=alltext(frequency) print "matching freq",ftext for freq in re.findall(ur"\b\d{3}\.\d{1,3}",ftext): freqmhz=float(freq) space['freqs'].append((callsigntxt.strip(),freqmhz)) if space and 'points' in space: assert 'freqs' in space assert 'points' in space assert 'floor' in space assert 'ceiling' in space assert 'type' in space spaces.append(space) if thrs: ad['runways']=rwy_constructor.get_rwys(thrs) aip_text_documents.help_parse_doc(ad,url, icao,"ee",title="General Information",category="general") ad['date']=date ad['url']=fetchdata.getrawurl(url,'ee') print "AD:",ad assert 'pos' in ad assert 'name' in ad ads.append(ad)
) parse_landing_chart.help_plc( ad, "/AIP/AD/AD 2/%s/ES_AD_2_%s_6-1_en.pdf" % (icao, icao), icao, ad["pos"], "se", variant="vac" ) parse_landing_chart.help_plc( ad, "/AIP/AD/AD 2/%s/ES_AD_2_%s_2-3_en.pdf" % (icao, icao), icao, ad["pos"], "se", variant="parking" ) # aip_text_documents.help_parse_doc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_6_1_en.pdf"%(icao,icao), # icao,"se",title="General Information",category="general") aip_text_documents.help_parse_doc( ad, "/AIP/AD/AD 2/%s/ES_AD_2_%s_en.pdf" % (icao, icao), icao, "se", title="General Information", category="general", ) # if purge: # parse_landing_chart.purge_old(chartblobnames,country="se") # sys.exit(1) for extra in extra_airfields.extra_airfields: if filtericao(extra): ads.append(extra) print print for k, v in sorted(points.items()):