def ek_parse_airfield(icao): # http://www.slv.dk # raise Exception("This doesn't work - you need to click through web-interface for the links to work") # url="/Dokumenter/dsweb/Get/Document-1492/EK_AD_2_%s_en.pdf"%(icao,) data, date = getdata(url, country="ek", maxcacheage=86400 * 7) if 0: pages, date = miner.parse(url, maxcacheage=86400, country="ek", usecache=True) print icao, "bytes:", len(data)
def ek_parse_airfield(icao): #http://www.slv.dk #raise Exception("This doesn't work - you need to click through web-interface for the links to work") #url="/Dokumenter/dsweb/Get/Document-1492/EK_AD_2_%s_en.pdf"%(icao,) data,date=getdata(url,country="ek",maxcacheage=86400*7) if 0: pages,date=miner.parse(url, maxcacheage=86400, country='ek',usecache=True) print icao,"bytes:",len(data)
def ep_parse_airfields(filtericao=None): pages, date = miner.parse("/aip/openp.php?id=EP_AD_1_en", maxcacheage=86400 * 7, country='ep', usecache=True) icaos = [] print "Nr pages:", len(pages) for nr, page in enumerate(pages): for item in page.get_by_regex(ur".*\bICAO\s*CODE\b.*"): print "Icao", item for icaoitem in page.get_partially_in_rect(item.x1, item.y1 + 0.1, item.x2, 100): for icao in re.findall(ur"\b(EP[A-Z]{2})\b", icaoitem.text): assert len(icao) == 4 icaos.append(icao)
def ek_parse_airfields(): # http://www.slv.dk raise Exception("This doesn't work - you need to click through web-interface for the links to work") pages, date = miner.parse( "/Dokumenter/dsweb/Get/Document-6465/EK_AD_1_3_en.pdf", maxcacheage=86400, country="ek", usecache=True ) icaos = [] print "Nr pages:", len(pages) for nr, page in enumerate(pages): for item in page.get_by_regex(ur".*Aerodrome.*", re.UNICODE | re.IGNORECASE): print "Icao", item for icaoitem in page.get_partially_in_rect(item.x1, item.y1 + 0.1, item.x2, 100): for icao in re.findall(ur"\b(EK[A-Z]{2})\b", icaoitem.text): assert len(icao) == 4 icaos.append(icao)
def ek_parse_airfields(): #http://www.slv.dk raise Exception("This doesn't work - you need to click through web-interface for the links to work") pages,date=miner.parse("/Dokumenter/dsweb/Get/Document-6465/EK_AD_1_3_en.pdf", maxcacheage=86400, country='ek',usecache=True) icaos=[] print "Nr pages:",len(pages) for nr,page in enumerate(pages): for item in page.get_by_regex(ur".*Aerodrome.*",re.UNICODE|re.IGNORECASE): print "Icao",item for icaoitem in page.get_partially_in_rect(item.x1,item.y1+0.1,item.x2,100): for icao in re.findall(ur"\b(EK[A-Z]{2})\b",icaoitem.text): assert len(icao)==4 icaos.append(icao)
def ep_parse_airfield(icao): spaces = [] pages, date = miner.parse("/aip/openp.php?id=EP_AD_2_%s_en" % (icao, ), maxcacheage=86400 * 7, country='ep', usecache=True) print "parsing ", icao, date points = None ctrname = None freqs = [] for nr, page in enumerate(pages): if nr == 0: def filter_tiny(its): for it in its: print "Filtering:", repr(it) print "size %f of <%s>." % (it.y2 - it.y1, it.text) textsize = it.y2 - it.y1 if textsize > 0.4: yield it namehdg, = page.get_by_regex( ur".*AERODROME\s+LOCATION\s+INDICATOR\s+AND\s+NAME.*", re.DOTALL) subs = page.get_partially_in_rect(0, namehdg.y1 + 0.5, 100, namehdg.y2 + 2.5) allsubs = [] for sub in subs: print "Item:", repr(sub) print "sub", repr(sub.subs) allsubs.extend(sub.subs) print "allsubs", allsubs lineobjs = list(filter_tiny(allsubs)) for lineobj in lineobjs: line = lineobj.text.strip() print "line:", line if line == icao: continue if re.match(ur".*AERODROME\s*LOCATION\s*INDICATOR.*", line): continue if re.match( ur".*WSKAŹNIK\s*LOKALIZACJI\s*LOTNISKA\s*I\s*NAZWA.*", line): continue m = re.match(ur"%s\s*[-]\s*([\w\s/]+)" % (icao, ), line, re.UNICODE | re.DOTALL) name, = m.groups() name = name.strip() break else: raise Exception("No name found!") print "Name:", name site, = page.get_by_regex( ur"ARP\s*-\s*WGS-84\s*coordinates\s*and\s*site\s*at\s*AD") print "site:", repr(site.text.strip()) splat = site.text.strip().split("\n") print "splat:", splat print len(splat) poss = splat[1:] print "rawpos,", poss for line in poss: m = re.match( ur"(\d+)°(\d+)'(\d+)''(N)\s*(\d+)°(\d+)'(\d+)''(E).*", line) if m: pos = mapper.parsecoord("".join(m.groups())) break else: raise Exception("No pos found") elevi, = page.get_by_regex( ur"\s*Elevation/Reference\s*temperature\s*", re.DOTALL) elevft, = re.match(ur".*\d+\s+m\s*\((\d+)\s+ft\).*", elevi.text, re.DOTALL).groups() elev = float(elevft)
def ep_parse_tma(): spaces = [] pages, date = miner.parse('/_Poland_EP_ENR_2_1_en.pdf', country='ep', usecache=True, maxcacheage=86400 * 7) for nr, page in enumerate(pages): #if nr!=1: continue #print "page",nr #print page.items desigs = page.get_by_regex(ur".*DESIGNATION AND LATERAL.*", re.DOTALL) for desig, next in izip(desigs, desigs[1:] + [None]): if nr == 0: #FIR uwagi = page.get_by_regex_in_rect(ur".*UWAGI\s*/\s*REMARKS.*", 0, desig.y2, 100, 100, re.DOTALL)[0] coords = page.get_lines2( page.get_partially_in_rect(0, desig.y2 + 0.5, desig.x2 + 10, uwagi.y1 - 0.5)) raw = "\n".join(coords) #print "Raw:\n",raw d = md5.md5(raw.encode('utf8')).hexdigest() assert d == "f336800a8183f1360415d2afef38e9ae" #print "Md5-digest",d #/further along the state border to the point 54°36’14.03”N 019°24’15.02”E - raw = fixup(u""" 54°27’28.03”N 019°38’24.05”E - 54°36’14.03”N 019°24’15.02”E - 55°50’58.98”N 017°32’52.80”E - 54°54’58.84”N 015°51’52.92”E - 54°55’00.00”N 015°08’07.00”E - /from this point the arc of 30 km radius centred at point 55°04’04”N 014°44’48”E - 54°55’00”N 014°21’27”E - 54°07’38”N 014°15’17”E - 54°07’34”N 014°12’05”E - 53°59’16”N 014°14’32”E - 53°55’40”N 014°13’34”E - <hack_longway_around_border>/further along the state border to the point 542615N 0194751E """) ##print "rw:",raw fir = mapper.parse_coord_str(raw, context='poland') fir_context = [ fir ] #In principle, a FIR could consist of multiple non-overlapping regions. In this case, the list here would contain more than one list of points #print fir #sys.exit(1) spaces.append( dict(points=fir, name="WARSZAWA FIR", icao="EPWW", floor="GND", ceiling="-", freqs=[], type="FIR", date=date)) continue areas = page.get_partially_in_rect(50, desig.y1 - 3, 100, desig.y1 - 0.5) #print "partially: <%s>"%(areas,) if len(areas) == 0: #print "Found continuation of area:",area pass else: lines = [] for s in reversed(page.get_lines2(areas)): if s.y1 >= desig.y1: break if re.match("\d+ \w{3} 2[01]\d{2}", s): break if re.match(ur"\s*AIP\s*POLAND\s*", s): #not real area. break if s.count("Responsibility boundary within SECTOR"): lines = [] #not real area name break m = re.match(".*\d+\.?\d*\s*([\w\s()]+)\s*$", s, re.UNICODE) if m: print "matched name", s, "as: <%s>" % (m.groups()) lines = [m.groups()[0]] break lines.append(s.strip()) if len(lines) == 0: pass #print "Continuation of area:",area else: area = " ".join(lines) print "areastr:", area print "Parsing area\n-------------------------------------------------\n\n", area uwagis = page.get_by_regex_in_rect(ur".*UWAGI/REMARKS.*", 0, desig.y2 + 1, 100, 100, re.DOTALL) y2 = 100 if len(uwagis): #print "Uwagi y1:",uwagis[0].y1 y2 = min(uwagis[0].y1 - 0.1, y2) if next: y2 = min(next.y1, y2) #print "next.y1",next.y1 #print "End of desig",y2 #print desig units = page.get_by_regex_in_rect(ur".*UNIT PROVIDING.*", desig.x2, desig.y1, 100, desig.y2, re.DOTALL) if len(units) == 0: continue unit, = units vertlim, = page.get_by_regex_in_rect(ur".*VERTICAL LIMITS.*", desig.x2, desig.y1, 100, desig.y2, re.DOTALL) freq, = page.get_by_regex_in_rect(ur".*FREQUENCY.*", desig.x2, desig.y1, 100, desig.y2, re.DOTALL) #print "Looking in ",desig.y2+0.5,y2 desigs = page.get_partially_in_rect(0, desig.y2 + 0.5, desig.x2 + 1, y2 - 0.8) #print "desigs,",repr(desigs) """ def clump(desigs): out=[] y1=1e30 y2=None for desig in desigs: if y2!=None: delta=desig.y1-y2 if delta> y1=min(desig.y1,y1) y2=max(desig.y2,y2) out.append(desig.text) """ #last_curfreq=None #out=[] if re.match(ur".*ATS\s*SERVICES\s*DELEGATION.*", area): break raws = [] found_x1 = None for sub in desigs: #print "\n\n-> y2",y2," cur sub:",sub.y1 if sub.y1 >= y2: break wholerow = page.get_lines2( page.get_partially_in_rect(0, sub.y1 + 0.25, 100, sub.y2 - 0.25)) wholerowstr = " ".join(wholerow) #print "Parse:<%s>"%(wholerowstr,) if re.match(ur".*\d+\.\d+\s+[\w\s*]+CONTROL\s*AREA\s*$", wholerowstr, re.UNICODE): break if re.match(ur".*\d+\s+ATS\s*SERVICES\s*DELEGATION.*", wholerowstr, re.UNICODE): break
def ep_parse_tma(): spaces=[] pages,date=miner.parse('/_Poland_EP_ENR_2_1_en.pdf', country='ep',usecache=True, maxcacheage=86400*7 ) for nr,page in enumerate(pages): #if nr!=1: continue #print "page",nr #print page.items desigs=page.get_by_regex(ur".*DESIGNATION AND LATERAL.*",re.DOTALL) for desig,next in izip(desigs,desigs[1:]+[None]): if nr==0: #FIR uwagi=page.get_by_regex_in_rect(ur".*UWAGI\s*/\s*REMARKS.*", 0,desig.y2,100,100,re.DOTALL)[0] coords=page.get_lines2(page.get_partially_in_rect( 0,desig.y2+0.5,desig.x2+10,uwagi.y1-0.5)) raw="\n".join(coords) #print "Raw:\n",raw d=md5.md5(raw.encode('utf8')).hexdigest() assert d=="f336800a8183f1360415d2afef38e9ae" #print "Md5-digest",d #/further along the state border to the point 54°36’14.03”N 019°24’15.02”E - raw=fixup(u""" 54°27’28.03”N 019°38’24.05”E - 54°36’14.03”N 019°24’15.02”E - 55°50’58.98”N 017°32’52.80”E - 54°54’58.84”N 015°51’52.92”E - 54°55’00.00”N 015°08’07.00”E - /from this point the arc of 30 km radius centred at point 55°04’04”N 014°44’48”E - 54°55’00”N 014°21’27”E - 54°07’38”N 014°15’17”E - 54°07’34”N 014°12’05”E - 53°59’16”N 014°14’32”E - 53°55’40”N 014°13’34”E - <hack_longway_around_border>/further along the state border to the point 542615N 0194751E """) ##print "rw:",raw fir=mapper.parse_coord_str(raw,context='poland') fir_context=[fir]#In principle, a FIR could consist of multiple non-overlapping regions. In this case, the list here would contain more than one list of points #print fir #sys.exit(1) spaces.append( dict( points=fir, name="WARSZAWA FIR", icao="EPWW", floor="GND", ceiling="-", freqs=[], type="FIR", date=date )) continue areas=page.get_partially_in_rect(50,desig.y1-3,100,desig.y1-0.5) #print "partially: <%s>"%(areas,) if len(areas)==0: #print "Found continuation of area:",area pass else: lines=[] for s in reversed(page.get_lines2(areas)): if s.y1>=desig.y1: break if re.match("\d+ \w{3} 2[01]\d{2}",s): break if re.match(ur"\s*AIP\s*POLAND\s*",s): #not real area. break if s.count("Responsibility boundary within SECTOR"): lines=[] #not real area name break m=re.match(".*\d+\.?\d*\s*([\w\s()]+)\s*$",s,re.UNICODE) if m: print "matched name",s,"as: <%s>"%(m.groups()) lines=[m.groups()[0]] break lines.append(s.strip()) if len(lines)==0: pass #print "Continuation of area:",area else: area=" ".join(lines) print "areastr:",area print "Parsing area\n-------------------------------------------------\n\n",area uwagis=page.get_by_regex_in_rect(ur".*UWAGI/REMARKS.*", 0,desig.y2+1,100,100,re.DOTALL) y2=100 if len(uwagis): #print "Uwagi y1:",uwagis[0].y1 y2=min(uwagis[0].y1-0.1,y2) if next: y2=min(next.y1,y2) #print "next.y1",next.y1 #print "End of desig",y2 #print desig units=page.get_by_regex_in_rect(ur".*UNIT PROVIDING.*", desig.x2,desig.y1,100,desig.y2,re.DOTALL) if len(units)==0: continue unit,=units vertlim,=page.get_by_regex_in_rect(ur".*VERTICAL LIMITS.*", desig.x2,desig.y1,100,desig.y2,re.DOTALL) freq,=page.get_by_regex_in_rect(ur".*FREQUENCY.*", desig.x2,desig.y1,100,desig.y2,re.DOTALL) #print "Looking in ",desig.y2+0.5,y2 desigs=page.get_partially_in_rect(0,desig.y2+0.5,desig.x2+1,y2-0.8) #print "desigs,",repr(desigs) """ def clump(desigs): out=[] y1=1e30 y2=None for desig in desigs: if y2!=None: delta=desig.y1-y2 if delta> y1=min(desig.y1,y1) y2=max(desig.y2,y2) out.append(desig.text) """ #last_curfreq=None #out=[] if re.match(ur".*ATS\s*SERVICES\s*DELEGATION.*",area): break raws=[] found_x1=None for sub in desigs: #print "\n\n-> y2",y2," cur sub:",sub.y1 if sub.y1>=y2: break wholerow=page.get_lines2(page.get_partially_in_rect(0,sub.y1+0.25,100,sub.y2-0.25)) wholerowstr=" ".join(wholerow) #print "Parse:<%s>"%(wholerowstr,) if re.match(ur".*\d+\.\d+\s+[\w\s*]+CONTROL\s*AREA\s*$",wholerowstr,re.UNICODE): break if re.match(ur".*\d+\s+ATS\s*SERVICES\s*DELEGATION.*",wholerowstr,re.UNICODE): break