def parse_sig_points(): p=Parser("/AIP/ENR/ENR 4/ES_ENR_4_4_en.pdf") points=[] for pagenr in xrange(p.get_num_pages()): #print "Processing page %d"%(pagenr,) page=p.parse_page_to_items(pagenr) lines=page.get_lines(page.get_all_items(),order_fudge=20) for line in lines: cols=line.split() if len(cols)>2: coordstr=" ".join(cols[1:3]) #print cols if len(mapper.parsecoords(coordstr))>0: crd=mapper.parsecoord(coordstr) #print "Found %s: %s"%(cols[0],crd) points.append(dict( name=cols[0], kind='sig. point', pos=crd)) p=Parser("/AIP/ENR/ENR 4/ES_ENR_4_1_en.pdf") for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) nameheading,=page.get_by_regex(r".*Name of station.*") freqheading,=page.get_by_regex(r".*Frequency.*") coordheading,=page.get_by_regex(r".*Coordinates.*") items=sorted(list(x for x in page.get_partially_in_rect(nameheading.x1,nameheading.y2+2,nameheading.x1+1,100) if x.text.strip()),key=lambda x:x.y1) idx=0 while True: if items[idx].text.strip()=="": idx+=1 continue if idx+1>=len(items): break name=items[idx] kind=items[idx+1] diffy=kind.y1-name.y2 #print "Name, kind:",name,kind #print name.text,kind.text,diffy assert kind.text.count("VOR") or kind.text.count("DME") or kind.text.count("NDB") assert diffy<0.5 #print "Frq cnt: <%s>"%(page.get_partially_in_rect(freqheading.x1,name.y1+0.05,freqheading.x2,kind.y2-0.05),) freqraw=" ".join(page.get_lines(page.get_partially_in_rect(freqheading.x1,name.y1+0.05,freqheading.x2,kind.y2-0.05))) short,freq=re.match(r"\s*([A-Z]{2,3})?\s*(\d+(?:\.?\d+)\s+(?:MHz|kHz))\s*(?:H24)?\s*",freqraw).groups() posraw=" ".join(page.get_lines(page.get_partially_in_rect(coordheading.x1,name.y1+0.05,coordheading.x2,kind.y2-0.05))) #print "Rawpos<%s>"%(posraw,) pos=mapper.parse_coords(*re.match(r".*?(\d+\.\d+[NS]).*?(\d+\.\d+[EW]).*",posraw).groups()) #print "Name: %s, Shortname: %s, Freq: %s,pos: %s"%(name.text,short,freq,pos) points.append(dict( name=short+" "+kind.text.strip()+" "+name.text.strip(), short=short, kind="nav-aid", pos=pos, freq=freq)) idx+=2 return points
def ey_parse_airfield(icao): spaces = [] p = Parser("/EY_AD_2_%s_en.pdf" % (icao, ), lambda x: x) freqs = [] for nr in xrange(0, p.get_num_pages()): page = p.parse_page_to_items(nr) if nr == 0: #[–-] nameregex = ur"\s*%s\s*[–-]\s*(.*?)\s*$" % (icao, ) print "Nameregex", nameregex nameitem = page.get_by_regex(nameregex, re.UNICODE)[0] name, = re.match(nameregex, nameitem.text, re.UNICODE).groups() name = name.replace("Tarptautinis", "International") #print repr(name) #sys.exit(1) coordhdg, = page.get_by_regex(ur".*ARP\s*koordinat.s.*", re.DOTALL) coord = page.get_partially_in_rect(coordhdg.x2 + 4, coordhdg.y1 + 0.1, 100, coordhdg.y2 - 0.1)[0] pos, = mapper.parsecoords(fixup(coord.text.replace(" ", ""))) elevhdg, = page.get_by_regex(ur".*Vietos\s*aukštis.*", re.DOTALL) elevitem, = page.get_partially_in_rect(elevhdg.x2 + 1, elevhdg.y1 + 0.1, 100, elevhdg.y2 - 0.1) elev, = re.match(ur"(\d+)\s*FT.*", elevitem.text).groups() elev = int(elev) for comm in page.get_by_regex(ur".*ATS\s*COMMUNICATION\s*FACILITIES.*", re.DOTALL): ends = page.get_by_regex_in_rect(ur".*RADIO\s*NAVIGATION.*", 0, comm.y2, 100, 100) if ends: end = ends[0].y1 - 0.1 else: end = 100 freqitems = page.get_by_regex_in_rect(ur".*\d{3}\.\d{3}.*", 0, comm.y2, 100, end - 0.1) lastservice = None for freq in freqitems: service = page.get_partially_in_rect(0, freq.y1 + 0.1, 17, freq.y2 - 0.1) if service: lastservice = service[0] print lastservice assert len(spaces) == 0 for freqstr in re.findall(ur"\d{3}\.\d{3}", freq.text): if freqstr != "121.500" and freqstr != "243.000": freqs.append( (lastservice.text.split("/")[0], float(freqstr)))
def ey_parse_airfield(icao): spaces=[] p=Parser("/EY_AD_2_%s_en.pdf"%(icao,),lambda x:x) freqs=[] for nr in xrange(0,p.get_num_pages()): page=p.parse_page_to_items(nr) if nr==0: #[–-] nameregex=ur"\s*%s\s*[–-]\s*(.*?)\s*$"%(icao,) print "Nameregex",nameregex nameitem=page.get_by_regex(nameregex,re.UNICODE)[0] name,=re.match(nameregex,nameitem.text,re.UNICODE).groups() name=name.replace("Tarptautinis","International") #print repr(name) #sys.exit(1) coordhdg,=page.get_by_regex(ur".*ARP\s*koordinat.s.*",re.DOTALL) coord=page.get_partially_in_rect( coordhdg.x2+4,coordhdg.y1+0.1,100,coordhdg.y2-0.1)[0] pos,=mapper.parsecoords(fixup(coord.text.replace(" ",""))) elevhdg,=page.get_by_regex(ur".*Vietos\s*aukštis.*",re.DOTALL) elevitem,=page.get_partially_in_rect( elevhdg.x2+1,elevhdg.y1+0.1,100,elevhdg.y2-0.1) elev,=re.match(ur"(\d+)\s*FT.*",elevitem.text).groups() elev=int(elev) for comm in page.get_by_regex(ur".*ATS\s*COMMUNICATION\s*FACILITIES.*",re.DOTALL): ends=page.get_by_regex_in_rect( ur".*RADIO\s*NAVIGATION.*", 0,comm.y2,100,100) if ends: end=ends[0].y1-0.1 else: end=100 freqitems=page.get_by_regex_in_rect( ur".*\d{3}\.\d{3}.*", 0,comm.y2,100,end-0.1) lastservice=None for freq in freqitems: service=page.get_partially_in_rect( 0,freq.y1+0.1,17,freq.y2-0.1) if service: lastservice=service[0] print lastservice assert len(spaces)==0 for freqstr in re.findall(ur"\d{3}\.\d{3}",freq.text): if freqstr!="121.500" and freqstr!="243.000": freqs.append((lastservice.text.split("/")[0],float(freqstr)))
def fi_parse_airfield(icao=None): spaces=[] ad=dict() assert icao!=None ad['icao']=icao sigpoints=[] #https://ais.fi/ais/eaip/pdf/aerodromes/EF_AD_2_EFET_EN.pdf #https://ais.fi/ais/eaip/aipcharts/efet/EF_AD_2_EFET_VAC.pdf #vacp=parse.Parser("/ais/eaip/aipcharts/%s/EF_AD_2_%s_VAC.pdf"%(icao.lower(),icao),lambda x: x,country="fi") def remove_italics(x): return x.replace("<i>","").replace("</i>","") p=parse.Parser("/ais/eaip/pdf/aerodromes/EF_AD_2_%s_EN.pdf"%(icao,),remove_italics,country="fi") #The following doesn't actually work, since finnish VAC are bitmaps!!! :-( if 0: vacpage=vacp.parse_page_to_items(0) repp=vacpage.get_by_regex("\s*REPORTING\s*POINTS\s*") assert len(repp)>0 for item in repp: lines=iter(page.get_lines(page.get_partially_in_rect(item.x1,item.y2+0.1,100,100))) for line in lines: uprint("Looking for reporting points:%s"%(line,)) name,lat,lon=re.match(ur"([A-ZÅÄÖ\s ]{3,})\s*([ \d]+N)\s*([ \d]+E).*",line) sigpoints.append(dict( name=icao+" "+name.strip(), kind="reporting", pos=mapper.parse_coords(lat.replace(" ",""),lon.replace(" ","")))) page=p.parse_page_to_items(0) nameregex=ur"%s\s+-\s+([A-ZÅÄÖ\- ]{3,})"%(icao,) for item in page.get_by_regex(nameregex): #print "fontsize:",item.fontsize assert item.fontsize>=14 ad['name']=re.match(nameregex,item.text).groups()[0].strip() break for item in page.get_by_regex(ur".*ELEV\s*/\s*REF.*"): lines=page.get_lines(page.get_partially_in_rect(0,item.y1+0.1,100,item.y2-0.1)) for line in lines: print "Line:",line ft,=re.match(".*ELEV.*([\d\.]+)\s*FT.*",line).groups() assert not 'elev' in ad ad['elev']=float(ft) for item in page.get_by_regex(ur"Mittapisteen.*sijainti"): lines=page.get_lines(page.get_partially_in_rect(item.x1,item.y1,100,item.y2)) for line in lines: for crd in mapper.parsecoords(line): assert not ('pos' in ad) ad['pos']=crd parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_ADC.pdf"%(icao.lower(),icao.upper()), icao,ad['pos'],country='fi' ) parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_VAC.pdf"%(icao.lower(),icao.upper()), icao,ad['pos'],country='fi',variant='VAC' ) parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_LDG.pdf"%(icao.lower(),icao.upper()), icao,ad['pos'],country='fi',variant='landing' ) parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_APDC.pdf"%(icao.lower(),icao.upper()), icao,ad['pos'],country='fi',variant='parking' ) aip_text_documents.help_parse_doc(ad,"/ais/eaip/pdf/aerodromes/EF_AD_2_%s_EN.pdf"%(icao.upper(),), icao,"fi",title="General Information",category="general") ad['runways']=[] thrs=[] freqs=[] for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) if page==None: continue for item in page.get_by_regex("\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*"): lines=page.get_lines(page.get_partially_in_rect(0,item.y2+0.1,100,100)) for line in lines: if re.match(ur"AD\s+2.13",line): break m=re.match(ur".*?(RWY END)?\s*\*?(\d{6}\.\d+N)\s*(\d{6,7}\.\d+E).*",line) if not m:continue rwyend,lat,lon=m.groups() rwytxts=page.get_lines(page.get_partially_in_rect(0,line.y1,12,line.y2)) print "Rwytxts:",rwytxts rwytxt,=rwytxts uprint("rwytext:",rwytxt) rwy,=re.match(ur"\s*(\d{2}[LRCM]?)\s*[\d.]*\s*",rwytxt).groups() have_thr=False for thr in thrs: if thr['thr']==rwy: have_thr=True if rwyend!=None and have_thr: continue thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy)) for item in page.get_by_regex("ATS AIRSPACE"): lines=iter(page.get_lines(page.get_partially_in_rect(0,item.y2+0.1,100,100))) spaces=[] line=lines.next() while True: while line.strip()=="": line=lines.next() print "Read line:",line if line.count("Vertical limits"): break m=re.match(ur".*?/\s+Designation and lateral limits\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:?\s*$",line) if not m: m=re.match(ur"\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:",line) #print "Second try:",m spacename,=m.groups() #print "Got spacename:",spacename assert spacename.strip()!="" coords=[] while True: line=lines.next() print "Further:",line if line.count("Vertical limits"): print "Breaking" break if not re.search(ur"[\d ]+N\s*[\d ]+E",line) and \ not re.search(ur"circle|cent[red]{1,5}|pitkin|point|equal\s*to",line): print "Breaking" break coords.append(line) areaspec="".join(coords) def fixup(m): lat,lon=m.groups() return lat.replace(" ","")+" "+lon.replace(" ","") areaspec=re.sub(ur"([\d ]+N)\s*([\d ]+E)",fixup,areaspec) areaspec=re.sub(ur"\(.*/\s*equal\s*to\s*Malmi\s*CTR\s*lateral\s*limits\)","",areaspec) #print "Fixed areaspec",areaspec #if icao=="EFKS": # areaspec=areaspec.replace("6615 28N","661528N") #Error! REstriction areas! spaces.append(dict( name=spacename, type="CTR", points=mapper.parse_coord_str(areaspec))) if line.count("Vertical limits"): #print "Breaking" break while not line.count("Vertical limits"): line=lines.next() #print "Matching veritcal limits--------------------------------" oldspaces=spaces spaces=[] for space in oldspaces: if space['name'].count("/"): a,b=space['name'].split("/") spaces.append(dict(space,name=a.strip())) spaces.append(dict(space,name=b.strip())) else: spaces.append(space) missing=set([space['name'] for space in spaces]) while True: for space in spaces: for it in xrange(3): cand=space['name'] if it==1: if cand.count("CTR"): cand="CTR" if cand.count("FIZ"): cand="FIZ" if it==2: if cand.count("CTR"): cand=r"CTR\s*/[\sA-Z]+" if cand.count("FIZ UPPER"): cand="FIZ UPPER" if cand.count("FIZ LOWER"): cand="FIZ LOWER" m=re.match(ur".*%s\s*:([^,:-]*)\s*-\s*([^,:-]*)"%(cand,),line) print "Matching ",cand," to ",line,"missing:",missing,m if m: break if len(spaces)==1 and not m: m=re.match(ur".*Vertical limits\s*(.*)\s*-\s*(.*)",line) if m: print "*****MATCH!!:::",m.groups() for lim in m.groups(): assert lim.count(",")==0 space['floor'],space['ceiling']=m.groups() missing.remove(space['name']) #print "Missing:" if len(missing)==0: break if len(missing)==0: break #print "Still missing:",missing line=lines.next() print "Parse f o n page",pagenr for item2 in page.get_by_regex(ur".*ATS\s*COMMUNICATION\s*FACILITIES.*"): lines=page.get_lines(page.get_partially_in_rect(0,item2.y2+0.1,100,100)) for line in lines: if line.count("RADIO NAVIGATION AND LANDING AIDS"): break print "Comm line:",line twr=re.match(ur"TWR.*(\d{3}\.\d{3})\b.*",line) if twr: freqs.append(('TWR',float(twr.groups()[0]))) atis=re.match(ur"ATIS.*(\d{3}\.\d{3})",line) if atis: freqs.append(('ATIS',float(atis.groups()[0])))
def ev_parse_airfields(): ads=[] spaces=[] seen=set() cur_airac=get_cur_airac() assert cur_airac for icao in ["EVRA", "EVLA", "EVTJ", "EVVA"]: thrs=[] url="/eAIPfiles/%s-AIRAC/html/eAIP/EV-AD-2.%s-en-GB.html"%(cur_airac,icao) data,date=fetchdata.getdata(url,country='ev') parser=lxml.html.HTMLParser() parser.feed(data) tree=parser.close() elev=None pos=None ctrarea=None ctr=None ctralt=None ctrname=None adcharturl=None adchart=None adnametag,=tree.xpath("//p[@class='ADName']") adnamestr=alltext(adnametag) print adnamestr name,=re.match(ur"%s\s*[-—]\s*([\w\s]+)"%(icao,),adnamestr,re.UNICODE).groups() freqs=[] for table in tree.xpath("//table"): rows=list(table.xpath(".//tr")) headings=list(table.xpath(".//th")) if len(headings)==5: if headings[2]=="Frequency": for row in rows: cols=alltexts(table.xpath(".//td")) desig,name=cols[0:2] freq,=re.match(ur"\d{3}\.\d{3}\s*MHz",cols[2]).groups() if freq!="121.500": freqs.append((desig+" "+name,float(freq))) continue for row in rows: cols=alltexts(row.xpath(".//td")) print "cols:",repr(cols) if len(cols)<2: continue if not pos and re.match(ur".*ARP\s*coordinates.*",cols[1]): pos,=mapper.parsecoords(cols[2]) if not elev and re.match(ur"Elevation.*",cols[1]): elev,=re.match(ur"(\d+) FT.*",cols[2]).groups() if not ctr and re.match(ur"Designation\s*and\s*lateral\s*limits",cols[1]): lines=cols[2].split("\n") ctr=True print "Got lateral limits",lines[0] try: ctrname,type_=re.match(ur"^([\w\s]+)(CTR|TIZ)",lines[0]).groups() ctrarea=" ".join(lines[1:]) except: ctrname,type_=re.match(ur"^([\w\s]+)(CTR|TIZ)",lines[0]+lines[1]).groups() ctrarea=" ".join(lines[2:]) assert ctrname.strip() ctrname=ctrname.strip()+" "+type_ #print ".",cols[1],"." if not ctralt and re.match(ur".*Vertical\s*limits.*",cols[1],re.UNICODE): ctralt=True #print "<",cols[2],">" alts=cols[2].split("/") if len(alts)==1: ceiling=alts[0] floor="GND" else: ceiling,floor=alts print "Parsed",ceiling,floor
def parse_sig_points(): p = Parser("/AIP/ENR/ENR 4/ES_ENR_4_4_en.pdf") points = [] for pagenr in xrange(p.get_num_pages()): #print "Processing page %d"%(pagenr,) page = p.parse_page_to_items(pagenr) lines = page.get_lines(page.get_all_items(), order_fudge=20) for line in lines: cols = line.split() if len(cols) > 2: coordstr = " ".join(cols[1:3]) #print cols if len(mapper.parsecoords(coordstr)) > 0: crd = mapper.parsecoord(coordstr) #print "Found %s: %s"%(cols[0],crd) points.append( dict(name=cols[0], kind='sig. point', pos=crd)) p = Parser("/AIP/ENR/ENR 4/ES_ENR_4_1_en.pdf") for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) nameheading, = page.get_by_regex(r".*Name of station.*") freqheading, = page.get_by_regex(r".*Frequency.*") coordheading, = page.get_by_regex(r".*Coordinates.*") items = sorted(list(x for x in page.get_partially_in_rect( nameheading.x1, nameheading.y2 + 2, nameheading.x1 + 1, 100) if x.text.strip()), key=lambda x: x.y1) idx = 0 while True: if items[idx].text.strip() == "": idx += 1 continue if idx + 1 >= len(items): break name = items[idx] kind = items[idx + 1] diffy = kind.y1 - name.y2 #print "Name, kind:",name,kind #print name.text,kind.text,diffy assert kind.text.count("VOR") or kind.text.count( "DME") or kind.text.count("NDB") assert diffy < 0.5 #print "Frq cnt: <%s>"%(page.get_partially_in_rect(freqheading.x1,name.y1+0.05,freqheading.x2,kind.y2-0.05),) freqraw = " ".join( page.get_lines( page.get_partially_in_rect(freqheading.x1, name.y1 + 0.05, freqheading.x2, kind.y2 - 0.05))) short, freq = re.match( r"\s*([A-Z]{2,3})?\s*(\d+(?:\.?\d+)\s+(?:MHz|kHz))\s*(?:H24)?\s*", freqraw).groups() posraw = " ".join( page.get_lines( page.get_partially_in_rect(coordheading.x1, name.y1 + 0.05, coordheading.x2, kind.y2 - 0.05))) #print "Rawpos<%s>"%(posraw,) pos = mapper.parse_coords(*re.match( r".*?(\d+\.\d+[NS]).*?(\d+\.\d+[EW]).*", posraw).groups()) #print "Name: %s, Shortname: %s, Freq: %s,pos: %s"%(name.text,short,freq,pos) points.append( dict(name=short + " " + kind.text.strip() + " " + name.text.strip(), short=short, kind="nav-aid", pos=pos, freq=freq)) idx += 2 return points
ad['name'] = re.match(nameregex, item.text).groups()[0].strip() break for item in page.get_by_regex(ur".*ELEV\s*/\s*REF.*"): lines = page.get_lines( page.get_partially_in_rect(0, item.y1 + 0.1, 100, item.y2 - 0.1)) for line in lines: print "Line:", line ft, = re.match(".*ELEV.*([\d\.]+)\s*FT.*", line).groups() assert not 'elev' in ad ad['elev'] = float(ft) for item in page.get_by_regex(ur"Mittapisteen.*sijainti"): lines = page.get_lines( page.get_partially_in_rect(item.x1, item.y1, 100, item.y2)) for line in lines: for crd in mapper.parsecoords(line): assert not ('pos' in ad) ad['pos'] = crd parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_ADC.pdf" % (icao.lower(), icao.upper()), icao, ad['pos'], country='fi') parse_landing_chart.help_plc(ad, "/ais/eaip/aipcharts/%s/EF_AD_2_%s_VAC.pdf" % (icao.lower(), icao.upper()), icao, ad['pos'], country='fi',
def ee_parse_airfield(icao=None): spaces = [] ad = dict() ad["icao"] = icao sigpoints = [] p = parse.Parser("/ee_%s.pdf" % (icao,), lambda x: x, country="ee") page = p.parse_page_to_items(0) print icao nameregex = ur".*%s\s*[-−]\s*([A-ZÅÄÖ\- ]{3,})" % (icao,) for item in page.get_by_regex(nameregex): print "fontsize:", item.fontsize assert item.fontsize >= 10 ad["name"] = re.match(nameregex, item.text).groups()[0].strip() break else: raise Exception("Found no airfield name!") for item in page.get_by_regex(ur".*Kõrgus merepinnast.*"): lines = page.get_lines(page.get_partially_in_rect(0, item.y1 + 0.1, 100, item.y2 - 0.1)) for line in lines: ft, = re.match(".*?([\d\.]+)\s*FT\.*", line).groups() assert not "elev" in ad print "parsed ft:", ft ad["elev"] = float(ft) for item in page.get_by_regex(ur"ARP koordinaadid"): lines = page.get_lines(page.get_partially_in_rect(item.x1, item.y1, 100, item.y2)) for line in lines: print line for crd in mapper.parsecoords(line): assert not ("pos" in ad) ad["pos"] = crd break else: raise Exception("No coords") ad["runways"] = [] thrs = [] freqs = [] for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) print "Parsing page", pagenr for item in page.get_by_regex("\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*"): print "Phys char" coords, = page.get_by_regex_in_rect("RWY end coordinates", 0, item.y2, 100, 100) design, = page.get_by_regex_in_rect("Designations", 0, item.y2, 100, 100) lines = page.get_lines(page.get_partially_in_rect(0, design.y2, design.x2, 100)) print "Design", lines rwys = [] for line in lines: m = re.match("(\d{2})", line) if m: print "rwynum", line rwys.append((m.groups()[0], line.y1)) rwys.append((None, 100)) for (rwy, y), (nextrwy, nexty) in izip(rwys, rwys[1:]): lines = page.get_lines(page.get_partially_in_rect(coords.x1, y, coords.x2, nexty - 0.5)) lines = [line for line in lines if line.strip()] print "Lines for rwy", lines thrlat, thrlon, endlat, endlon, undulation = lines[:5] assert undulation.count("GUND") thrs.append(dict(pos=mapper.parse_coords(thrlat, thrlon), thr=rwy)) print thrs if 0: for item in page.get_by_regex("ATS AIRSPACE"): lines = iter(page.get_lines(page.get_partially_in_rect(0, item.y2 + 0.1, 100, 100))) spaces = [] while True: line = lines.next() # print "Read line:",line if line.count("Vertical limits"): break m = re.match(ur".*?/\s+Designation and lateral limits\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:?\s*$", line) if not m: m = re.match(ur"\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:", line) # print "Second try:",m spacename, = m.groups() # print "Got spacename:",spacename assert spacename.strip() != "" coords = [] while True: line = lines.next() # print "Further:",line if line.count("Vertical limits"): break if not re.search(ur"[\d ]+N\s*[\d ]+E", line) and not re.search( ur"circle|cent[red]{1,5}|pitkin|point", line ): break coords.append(line) areaspec = "".join(coords) def fixup(m): lat, lon = m.groups() return lat.replace(" ", "") + " " + lon.replace(" ", "") areaspec = re.sub(ur"([\d ]+N)\s*([\d ]+E)", fixup, areaspec) # print "Fixed areaspec",areaspec # if icao=="EFKS": # areaspec=areaspec.replace("6615 28N","661528N") # Error! REstriction areas! spaces.append(dict(name=spacename, type="CTR", points=mapper.parse_coord_str(areaspec))) if line.count("Vertical limits"): # print "Breaking" break while not line.count("Vertical limits"): line = lines.next() # print "Matching veritcal limits--------------------------------" oldspaces = spaces spaces = [] for space in oldspaces: if space["name"].count("/"): a, b = space["name"].split("/") spaces.append(dict(space, name=a.strip())) spaces.append(dict(space, name=b.strip())) else: spaces.append(space) missing = set([space["name"] for space in spaces]) while True: for space in spaces: # print "Matching ",space['name']," to ",line,"missing:",missing for it in xrange(2): cand = space["name"] if it == 1: if cand.count("CTR"): cand = "CTR" if cand.count("FIZ"): cand = "FIZ" m = re.match(ur".*%s\s*:([^,:-]*)\s*-\s*([^,:-]*)" % (cand,), line) if m: break if len(spaces) == 1 and not m: m = re.match(ur".*Vertical limits\s*(.*)\s*-\s*(.*)", line) if m: for lim in m.groups(): assert lim.count(",") == 0 space["floor"], space["ceiling"] = m.groups() missing.remove(space["name"]) # print "Missing:" if len(missing) == 0: break if len(missing) == 0: break line = lines.next() print "Parse f o n page", pagenr for item2 in page.get_by_regex(ur".*ATS\s*COMMUNICATION\s*FACILITIES.*"): lines = page.get_lines(page.get_partially_in_rect(0, item2.y2 + 0.1, 100, 100)) for line in lines: if line.count("RADIO NAVIGATION AND LANDING AIDS"): break print "Comm line:", line twr = re.match(ur"TWR.*(\d{3}\.\d{3})\b.*", line) if twr: freqs.append(("TWR", float(twr.groups()[0]))) atis = re.match(ur"ATIS.*(\d{3}\.\d{3})", line) if atis: freqs.append(("ATIS", float(atis.groups()[0])))