def parse_mountain_area():
    p = Parser("/AIP/ENR/ENR%201/ES_ENR_1_1_en.pdf")
    #alongborder="610213N 0114917E - 632701N 0114917E - 661457N 0141140E - 682200N 0173441E - 683923N 0183004E - 683141N 0194631E - 690945N 0202604E - 683533N 0221411E - 680424N 0233833E - 670159N 0240734E - 663602N 0240455E - "
    areas = []
    for pagenr in xrange(p.get_num_pages()):
        #print "Processing page %d"%(pagenr,)
        page = p.parse_page_to_items(pagenr)
        lines = page.get_lines(page.get_all_items())
        allofit = " ".join(lines)

        allofit = allofit.replace(
            u"along the Swedish/Norwegian and Swedish/Finnish border to",
            u"Along the common X/Y state boundary to")
        allofit = allofit.replace(u"–", "-")

        coordarea = re.match(
            ur".*Mountainous\s+area\s+of\s+Sweden.{1,10}lateral\s+limits(.*?)AIRAC.*",
            allofit)
        if coordarea:
            points = []
            txt, = coordarea.groups()
            print "area:<", txt, ">"
            points = mapper.parse_coord_str(txt, context="sweden")
            assert (len(points) > 3)
            print "Point:", len(points)
            areas.append(
                dict(name="Mountainous Area",
                     floor="GND",
                     ceiling="UNL",
                     points=points,
                     type="mountainarea",
                     freqs=[]))
    print len(areas)
    assert len(areas) == 1
    return areas
Beispiel #2
0
def parse_areas(areas, atype):
    areas = splitareas(areas.split("\n"))
    points = []

    for area in areas:
        name = area[0].strip()
        assert len(name)
        if len(area) > 2 and is_alt(area[-2]):
            ceiling = parse_alt(area[-2])
            floor = parse_alt(area[-1])
            areapart = area[1:-2]
        else:
            try:
                floor, ceiling = parse_alts(area[-1])
                areapart = area[1:-1]
            except:
                floor, ceiling = "UNK", "UNK"
                areapart = area[1:]

        coords = "-".join([r.replace(" ", "").strip() for r in areapart if not r.startswith("*")])
        if coords.count("RADIUS"):
            coords = fix_circle(coords)
        # print "name:",name,"coords:",coords
        yield dict(
            name=unicode(name, "utf8"),
            type=atype,
            floor=floor,
            freqs=[],
            ceiling=ceiling,
            points=mapper.parse_coord_str(coords),
        )
Beispiel #3
0
def parse_areas(areas, atype):
    areas = splitareas(areas.split("\n"))
    points = []

    for area in areas:
        name = area[0].strip()
        assert len(name)
        if len(area) > 2 and is_alt(area[-2]):
            ceiling = parse_alt(area[-2])
            floor = parse_alt(area[-1])
            areapart = area[1:-2]
        else:
            try:
                floor, ceiling = parse_alts(area[-1])
                areapart = area[1:-1]
            except:
                floor, ceiling = "UNK", "UNK"
                areapart = area[1:]

        coords = "-".join([
            r.replace(" ", "").strip() for r in areapart
            if not r.startswith("*")
        ])
        if coords.count("RADIUS"):
            coords = fix_circle(coords)
        #print "name:",name,"coords:",coords
        yield dict(name=unicode(name, 'utf8'),
                   type=atype,
                   floor=floor,
                   freqs=[],
                   ceiling=ceiling,
                   points=mapper.parse_coord_str(coords))
def parse_mountain_area():
    p=Parser("/AIP/ENR/ENR%201/ES_ENR_1_1_en.pdf")
    #alongborder="610213N 0114917E - 632701N 0114917E - 661457N 0141140E - 682200N 0173441E - 683923N 0183004E - 683141N 0194631E - 690945N 0202604E - 683533N 0221411E - 680424N 0233833E - 670159N 0240734E - 663602N 0240455E - "
    areas=[]
    for pagenr in xrange(p.get_num_pages()):
        #print "Processing page %d"%(pagenr,)
        page=p.parse_page_to_items(pagenr)
        lines=page.get_lines(page.get_all_items())
        allofit=" ".join(lines)
        
        allofit=allofit.replace(u"along the Swedish/Norwegian and Swedish/Finnish border to",
                                    u"Along the common X/Y state boundary to"                                
                                )
        allofit=allofit.replace(u"–","-")
        
        coordarea=re.match(ur".*Mountainous\s+area\s+of\s+Sweden.{1,10}lateral\s+limits(.*?)AIRAC.*",allofit)
        if coordarea:
            points=[]
            txt,=coordarea.groups()
            print "area:<",txt,">"
            points=mapper.parse_coord_str(txt,context="sweden")
            assert(len(points)>3)
            print "Point:",len(points)
            areas.append(dict(
                    name="Mountainous Area",
                    floor="GND",
                    ceiling="UNL",
                    points=points,
                    type="mountainarea",
                    freqs=[]))
    print len(areas)
    assert len(areas)==1
    return areas     
Beispiel #5
0
    def emit():
        if name == None:
            raise Exception("Area is missing name")
        if ceiling == None or floor == None:
            raise Exception("Area is missing floor or ceiling")
        cd = " - ".join(coords)

        ret = dict(floor=floor,
                   ceiling=ceiling,
                   freqs=[],
                   type="segel",
                   name=name + " glider sector",
                   points=mapper.parse_coord_str(cd))
        return ret
 def emit():
     if name==None:
         raise Exception("Area is missing name")
     if ceiling==None or floor==None:
         raise Exception("Area is missing floor or ceiling")
     cd=" - ".join(coords)
     
     ret=dict(
         floor=floor,
         ceiling=ceiling,
         freqs=[],
         type="segel",
         name=name+" glider sector",
         points=mapper.parse_coord_str(cd))
     return ret
Beispiel #7
0
def ee_parse_tma():
    def fixgote(raw):
        return raw

    p = parse.Parser(r"/2012-03-08/pdf/EE-ENR-2.1.pdf", fixgote, country='ee')

    res = []
    for pagenr in xrange(1, p.get_num_pages()):
        parsed = parse_page(p, pagenr)  #pagenr)
        res.extend(parsed)

    res.append(
        dict(name="TALLIN FIR",
             icao="EETT",
             floor='GND',
             ceiling='-',
             freqs=[],
             type='FIR',
             date=datetime(2011, 03, 25),
             points=mapper.parse_coord_str("""                
        592818N 0280236E -
        Along the common Estonian/X state boundary to 573100N 0272000E -
        Along the common Estonian/X state boundary to 575300N 0242200E -
        575228N 0242124E-
        575502N 0241540E-575357N 0241234E-
        575357N 0233604E-574658N 0233855E-
        574011N 0233456E-573538N 0232422E-
        573511N 0231051E-574208N 0225957E-
        574650N 0225428E-575627N 0224227E-
        575539N 0223501E-574645N 0220836E-
        574458N 0215458E-574547N 0215034E-
        574712N 0214300E-575124N 0213848E-
        575342N 0213648E-580700N 0212900E-
        582448N 0203834E-590000N 0210000E-
        595300N 0245100E-595430N 0252000E-
        595300N 0255200E-595200N 0255830E-
        593642N 0273812E-592818N 0280236E
        """,
                                           context='estonia')))
Beispiel #8
0
def ee_parse_tma():
    def fixgote(raw):
        return raw
    p=parse.Parser(r"/2012-03-08/pdf/EE-ENR-2.1.pdf",fixgote,country='ee')
	
    res=[]    
    for pagenr in xrange(1,p.get_num_pages()): 
        parsed=parse_page(p,pagenr)#pagenr)
        res.extend(parsed)
                    
                    
    res.append(dict(
        name="TALLIN FIR",
        icao="EETT",
        floor='GND',
        ceiling='-',
        freqs=[],
        type='FIR',
        date=datetime(2011,03,25),
        points=mapper.parse_coord_str("""                
        592818N 0280236E -
        Along the common Estonian/X state boundary to 573100N 0272000E -
        Along the common Estonian/X state boundary to 575300N 0242200E -
        575228N 0242124E-
        575502N 0241540E-575357N 0241234E-
        575357N 0233604E-574658N 0233855E-
        574011N 0233456E-573538N 0232422E-
        573511N 0231051E-574208N 0225957E-
        574650N 0225428E-575627N 0224227E-
        575539N 0223501E-574645N 0220836E-
        574458N 0215458E-574547N 0215034E-
        574712N 0214300E-575124N 0213848E-
        575342N 0213648E-580700N 0212900E-
        582448N 0203834E-590000N 0210000E-
        595300N 0245100E-595430N 0252000E-
        595300N 0255200E-595200N 0255830E-
        593642N 0273812E-592818N 0280236E
        """,context='estonia')))
def fi_parse_restrictions():
    spaces=[]
    
    p=parse.Parser("/ais/eaip/pdf/enr/EF_ENR_5_2_EN.pdf",lambda x: x,country='fi')
    for pagenr in xrange(p.get_num_pages()):
        page=p.parse_page_to_items(pagenr)
        headings=list(page.get_by_regex(ur"EF T[RS]A \d+"))+[None]        
        for tra,next in izip(headings,headings[1:]):
            y1=tra.y2+0.1
            if next:
                y2=next.y1-0.1
            else:
                y2=100
                
            o=[]
            for line in page.get_lines(page.get_partially_in_rect(
                                            0,y1,100,y2)):
                line=line.strip()
                if line.endswith("clock-"):
                    line=line.rstrip("-")
                line=line.replace("to the point  -","to the point ")
                print "Eval",line
                if line=="":break
                o.append(line)
            print "AREA:<","".join(o),">"
            kind,number=re.match("EF (T[RS]A) (\d+)",tra.text).groups()            
            
            spaces.append(dict(
                name="EF %s %s"%(kind,number),
                points=mapper.parse_coord_str("".join(o),context="finland"),
                ceiling="UNL",
                floor="GND",
                type="TSA",
                freqs=[]
                    ))

        
    p=parse.Parser("/ais/eaip/pdf/enr/EF_ENR_5_1_EN.pdf",lambda x: x,country='fi')
    for pagenr in xrange(p.get_num_pages()):        
        page=p.parse_page_to_items(pagenr)
        raws=list(sorted(page.get_by_regex(ur"(?:EF [PRD]\d+[A-Z]{0,2} .*)|(?:.*Tunnus, nimi ja sivurajat.*)"),key=lambda x:x.y1))+[None]
        for cur,next in izip(raws[:-1],raws[1:]):
            if cur.text.count("Tunnus, nimi ja sivurajat"): continue #not a real airspace
            space=dict()
            if next==None:
                y2=100
            else:
                y2=next.y1-1.75
            name=cur.text.strip()
            space['name']=name
            if name.startswith("EF R28"):
                continue #This airspace is special, and not supported now (it's the no-mans-land-zone on border to russia!)
        
            areaspecprim=page.get_lines(page.get_partially_in_rect(cur.x1+0.01,cur.y2+0.05,cur.x1+50,y2))
            areaspec=[]
            for area in areaspecprim:
                if len(areaspec) and area.strip()=="": break
                areaspec.append(area)
            print "Y-interval:",cur.y1,y2,"next:",next
            print "Name:",space['name']
            print "areaspec:",areaspec
            space['points']=mapper.parse_coord_str("".join(areaspec))
            vertitems=page.get_partially_in_rect(cur.x1+55,cur.y1+0.05,cur.x1+70,y2+1.5)
            vertspec=[x.strip() for x in page.get_lines(vertitems) if x.strip()]
            print repr(vertspec)
            assert len(vertspec)==2
            ceiling,floor=vertspec
            space['ceiling']=ceiling
            space['floor']=floor
            space['type']='R'
            space['freqs']=[]
            spaces.append(space)
            

    return spaces
Beispiel #10
0
def ev_parse_tma():
    out = []
    parser = lxml.html.HTMLParser()
    #url="/Latvia_EV-ENR-2.1-en-GB.html"
    cur_airac = get_cur_airac()
    url = "/eAIPfiles/%s-AIRAC/html/eAIP/EV-ENR-2.1-en-GB.html" % (cur_airac, )

    data, date = fetchdata.getdata(url, country='ev')
    parser.feed(data)
    tree = parser.close()

    got_fir = False
    for table in tree.xpath("//table"):
        #print "Table with %d children"%(len(table.getchildren()),)
        rows = list(table.xpath(".//tr"))
        for idx in xrange(5):
            headingrow = rows[idx]
            cols = list(headingrow.xpath(".//th"))
            #print len(cols)
            if len(cols) == 5:
                break
        else:
            raise Exception("No heading row")
        assert idx == 0
        #for idx,col in enumerate(cols):
        #    print "Col %d, %s"%(idx,alltext(col)[:10])
        nameh, unith, callsignh, freqh, remarkh = cols
        assert alltext(nameh).lower().count("name")
        assert alltext(unith).lower().count("unit")
        assert re.match(ur"call\s*sign", alltext(callsignh).lower())
        lastcols = None
        for row in rows[1:]:
            cols = list(row.xpath(".//td"))
            if len(cols) == 5:
                name, unit, callsign, freq, remark = cols
                lastcols = cols
            else:
                if lastcols:
                    unit, callsign, freq, remark = lastcols[1:]
                    name = cols[0]
                else:
                    continue

            lines = [x.strip() for x in alltext(name).split("\n") if x.strip()]
            if len(lines) == 0: continue
            spacename = lines[0].strip()

            if re.match(ur"RIGA\s*UTA|RIGA\s*CTA|RIGA\s*AOR.*", spacename):
                continue
            freqstr = alltext(freq)
            callsignstr = alltext(callsign)
            if freqstr.strip():
                print freqstr
                freqmhzs = re.findall(ur"\d{3}\.\d{3}", freqstr)
                assert len(freqmhzs) <= 2
                callsigns = [callsignstr.split("\n")[0].strip()]
                freqs = []
                for idx, freqmhz in enumerate(freqmhzs):
                    if freqmhz == '121.500': continue
                    freqs.append((callsigns[idx], float(freqmhz)))
                print "freqs:", freqs
            else:
                freqs = []
            assert len(lines)

            classidx = next(idx for idx, x in reversed(list(enumerate(lines)))
                            if x.lower().count("class of airspace"))

            if re.match(ur"RIGA\s*FIR.*UIR", spacename, re.UNICODE):
                got_fir = True
                lastspaceidx = classidx - 2
                floor = "GND"
                ceiling = "-"
                type_ = "FIR"
            else:
                if lines[classidx - 1].count("/") == 1:
                    floor, ceiling = lines[classidx - 1].split("/")
                    lastspaceidx = classidx - 1
                else:
                    floor = lines[classidx - 1]
                    ceiling = lines[classidx - 2]
                    lastspaceidx = classidx - 2
                ceiling = strangefix(ceiling)
                floor = strangefix(floor)

                mapper.parse_elev(ceiling)
                mapper.parse_elev(floor)
                type_ = "TMA"
            tcoords = lines[1:lastspaceidx]
            #verify that we got actual altitudes:
            coords = []
            for coord in tcoords:
                coord = coord.strip().replace("(counter-)", "").replace(
                    "(RIGA DVOR - RIA)", "")
                if coord.endswith(u"E") or coord.endswith("W"):
                    coord = coord + " -"
                coords.append(coord)

            raw = " ".join(coords)
            raw = re.sub(
                s(ur"Area bounded by lines successively joining the following points:"
                  ), "", raw)
            print "Raw:", raw
            coords = mapper.parse_coord_str(raw, context='latvia')
            for cleaned in clean_up_polygon(coords):
                out.append(
                    dict(name=spacename,
                         points=cleaned,
                         type=type_,
                         freqs=freqs,
                         floor=floor,
                         url=url,
                         date=date,
                         ceiling=ceiling))
                if type_ == 'FIR':
                    out[-1]['icao'] = "EVRR"
def extract_airfields(filtericao=lambda x:True,purge=True):
    #print getxml("/AIP/AD/AD 1/ES_AD_1_1_en.pdf")
    ads=[]
    p=Parser("/AIP/AD/AD 1/ES_AD_1_1_en.pdf")
    points=dict()
    startpage=None
    for pagenr in xrange(p.get_num_pages()):
        page=p.parse_page_to_items(pagenr)
        if page.count("Aerodrome directory"):
            startpage=pagenr
            break
    if startpage==None:
        raise Exception("Couldn't find aerodrome directory in file")
    #print "Startpage: %d"%(startpage,)
    #nochartf=open("nochart.txt","w")
    for pagenr in xrange(startpage,p.get_num_pages()):
        row_y=[]
        page=p.parse_page_to_items(pagenr)
        allines=[x for x in (page.get_lines(page.get_partially_in_rect(0,0,15,100))) if x.strip()]
        for item,next in zip(allines,allines[1:]+[""]):
            #print "item:",item
            
            m=re.match(ur"^\s*[A-ZÅÄÖ]{3,}(?:/.*)?\b.*",item)
            if m:
                #print "Candidate, next is:",next
                if re.match(r"^\s*[A-Z]{4}\b.*",next):
                    #print "Matched:",item
                    #print "y1:",item.y1                    
                    row_y.append(item.y1)
        for y1,y2 in zip(row_y,row_y[1:]+[100.0]):
            #print "Extacting from y-range: %f-%f"%(y1,y2)
            items=list(page.get_partially_in_rect(0,y1-0.25,5.0,y2+0.25,ysort=True))
            if len(items)>=2:
                #print "Extract items",items
                ad=dict(name=unicode(items[0].text).strip(),
                        icao=unicode(items[1].text).strip()                    
                        )
                #print "Icao:",ad['icao']
                assert re.match(r"[A-Z]{4}",ad['icao'])
                if not filtericao(ad): continue
                if len(items)>=3:
                    #print "Coord?:",items[2].text
                    m=re.match(r".*(\d{6}N)\s*(\d{7}E).*",items[2].text)
                    if m:
                        lat,lon=m.groups()
                        ad['pos']=parse_coords(lat,lon)           
                        #print "Items3:",items[3:]   
                        elev=re.findall(r"(\d{1,5})\s*ft"," ".join(t.text for t in items[3:]))
                        #print "Elev:",elev
                        assert len(elev)==1
                        ad['elev']=int(elev[0])                        
                                     
                ads.append(ad)

        
    big_ad=set()        
    for ad in ads:
        if not ad.has_key('pos'):
            big_ad.add(ad['icao'])
            
    for ad in ads:        
        icao=ad['icao']
        if icao in big_ad:            
            if icao in ['ESIB','ESNY','ESCM','ESPE']:
                continue                    
            
            try:
                p=Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_6_1_en.pdf"%(icao,icao))
            except:
                p=Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_6-1_en.pdf"%(icao,icao))

            ad['aipvacurl']=p.get_url()
            for pagenr in xrange(p.get_num_pages()):
                page=p.parse_page_to_items(pagenr)
                
                """
                for altline in exitlines:
                    m=re.match(r"(\w+)\s+(\d+N)\s*(\d+E.*)",altline)
                    if not m: continue
                    name,lat,lon=m.groups()
                    try:
                        coord=parse_coords(lat,lon)
                    except Exception:
                        continue
                    points.append(dict(name=name,pos=coord))
                """
                
                for kind in xrange(2):
                    if kind==0:
                        hits=page.get_by_regex(r"H[Oo][Ll][Dd][Ii][Nn][Gg]")
                        kind="holding point"
                    if kind==1:
                        hits=page.get_by_regex(r"[Ee]ntry.*[Ee]xit.*point")                    
                        kind="entry/exit point"
                    if len(hits)==0: continue
                    for holdingheading in hits:

                        items=sorted(page.get_partially_in_rect(holdingheading.x1+2.0,holdingheading.y2+0.1,holdingheading.x1+0.5,100),
                            key=lambda x:x.y1)
                        items=[x for x in items if not x.text.startswith(" ")]
                        #print "Holding items:",items
                        for idx,item in enumerate(items):
                            print "Holding item",item
                            y1=item.y1
                            if idx==len(items)-1:
                                y2=100
                            else:
                                y2=items[idx+1].y1
                            items2=[x for x in page.get_partially_in_rect(item.x1+1,y1+0.3,item.x1+40,y2-0.1) if x.x1>=item.x1-0.25 and x.y1>=y1-0.05 and x.y1<y2-0.05]
                            s=(" ".join(page.get_lines(items2))).strip()
                            print "Holding lines:",repr(page.get_lines(items2))
                            #if s.startswith("ft Left/3"): #Special case for ESOK
                            #    s,=re.match("ft Left/3.*?([A-Z]{4,}.*)",s).groups()
                            #m=re.match("ft Left/\d+.*?([A-Z]{4,}.*)",s)
                            #if m:
                            #    s,=m.groups()
                                
                            if s.startswith("LjUNG"): #Really strange problem with ESCF
                                s=s[0]+"J"+s[2:]
                            if s.lower().startswith("holding"):
                                sl=s.split(" ",1)
                                if len(sl)>1:
                                    s=sl[1]
                            s=s.strip()
                            if kind=="entry/exit point" and s.startswith("HOLDING"):
                                continue #reached HOLDING-part of VAC
                                
                            #Check for other headings
                            #Fixup strange formatting of points in some holding items: (whitespace between coord and 'E')                            
                            s=re.sub(ur"(\d+)\s*(N)\s*(\d+)\s*(E)",lambda x:"".join(x.groups()),s)

                            m=re.match(r"([A-Z]{2,}).*?(\d+N)\s*(\d+E).*",s)
                            if not m:                                
                                m=re.match(r".*?(\d+N)\s*(\d+E).*",s) 
                                if not m:
                                    continue
                                assert m
                                lat,lon=m.groups()
                                #skavsta
                                if icao=="ESKN":
                                    if s.startswith(u"Hold north of T"):
                                        name="NORTH"
                                    elif s.startswith(u"Hold south of B"):
                                        name="SOUTH"                     
                                    else:
                                        assert 0
                                #add more specials here            
                                else:
                                    continue
                            else:
                                name,lat,lon=m.groups()
                            try:
                                coord=parse_coords(lat,lon)
                            except Exception:
                                print "Couldn't parse:",lat,lon
                                continue
                            #print name,lat,lon,mapper.format_lfv(*mapper.from_str(coord))
                            
                            if name.count("REMARK") or len(name)<=2:
                                print "Suspicious name: ",name
                                #sys.exit(1)
                                continue
                            points[icao+' '+name]=dict(name=icao+' '+name,icao=icao,pos=coord,kind=kind)


    #for point in points.items():
    #    print point


    #sys.exit(1)

    def fixhex11(s):
        out=[]
        for c in s:
            i=ord(c)
            if i>=0x20:
                out.append(c)
                continue
            if i in [0x9,0xa,0xd]:
                out.append(c)
                continue
            out.append(' ')
            
        return "".join(out)
        
    for ad in ads:
        icao=ad['icao']
        if icao in big_ad:
            #print "Parsing ",icao
            p=Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_en.pdf"%(icao,icao),loadhook=fixhex11)
            ad['aiptexturl']=p.get_url()
            firstpage=p.parse_page_to_items(0)
            te="\n".join(firstpage.get_all_lines())                        
            #print te
            coords=re.findall(r"ARP.*(\d{6}N)\s*(\d{7}E)",te)
            if len(coords)>1:
                raise Exception("First page of airport info (%s) does not contain exactly ONE set of coordinates"%(icao,))
            if len(coords)==0:
                print "Couldn't find coords for ",icao
            #print "Coords:",coords
            ad['pos']=parse_coords(*coords[0])

            elev=re.findall(r"Elevation.*?(\d{1,5})\s*ft",te,re.DOTALL)
            if len(elev)>1:
                raise Exception("First page of airport info (%s) does not contain exactly ONE elevation in ft"%(icao,))
            if len(elev)==0:
                print "Couldn't find elev for ",icao                
            ad['elev']=int(elev[0])
            freqs=[]
            found=False
            thrs=[]
            #uprint("-------------------------------------")
            for pagenr in xrange(p.get_num_pages()):
                page=p.parse_page_to_items(pagenr)
                #uprint("Looking on page %d"%(pagenr,))
                if 0: #opening hours are no longer stored in a separate document for any airports. No need to detect which any more (since none are).
                    for item in page.get_by_regex(".*OPERATIONAL HOURS.*"):
                        lines=page.get_lines(page.get_partially_in_rect(0,item.y2+0.1,100,100))
                        for line in lines:
                            things=["ATS","Fuelling","Operating"]
                            if not line.count("AIP SUP"): continue
                            for thing in things:
                                if line.count(thing):
                                    ad['aipsup']=True
                        
                    
                for item in page.get_by_regex(".*\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*.*"):
                    #uprint("Physical char on page")
                    lines=page.get_lines(page.get_partially_in_rect(0,item.y2+0.1,100,100))
                    seen_end_rwy_text=False
                    for line,nextline in izip(lines,lines[1:]+[None]):
                        #uprint("MAtching: <%s>"%(line,))
                        if re.match(ur"AD\s+2.13",line): break
                        if line.count("Slope of"): break
                        if line.lower().count("end rwy:"): seen_end_rwy_text=True
                        if line.lower().count("bgn rwy:"): seen_end_rwy_text=True
                        m=re.match(ur".*(\d{6}\.\d+)[\s\(\)\*]*(N).*",line)
                        if not m:continue
                        m2=re.match(ur".*(\d{6,7}\.\d+)\s*[\s\(\)\*]*(E).*",nextline)                            
                        if not m2:continue
                        latd,n=m.groups()
                        lond,e=m2.groups()
                        assert n=="N"
                        assert e=="E"
                        lat=latd+n
                        lon=lond+e
                        rwytxts=page.get_lines(page.get_partially_in_rect(0,line.y1+0.05,12,nextline.y2-0.05))
                        uprint("Rwytxts:",rwytxts)
                        rwy=None
                        for rwytxt in rwytxts:
                            #uprint("lat,lon:%s,%s"%(lat,lon))
                            #uprint("rwytext:",rwytxt)
                            m=re.match(ur"\s*(\d{2}[LRCM]?)\b.*",rwytxt)
                            if m:
                                assert rwy==None
                                rwy=m.groups()[0]
                        if rwy==None and seen_end_rwy_text:
                            continue
                        print "Cur airport:",icao
                        already=False
                        assert rwy!=None
                        seen_end_rwy_text=False
                        for thr in thrs:
                            if thr['thr']==rwy:
                                raise Exception("Same runway twice on airfield:"+icao)
                        thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy))
            assert len(thrs)>=2
            for pagenr in xrange(0,p.get_num_pages()):
                page=p.parse_page_to_items(pagenr)                                              
                
                matches=page.get_by_regex(r".*ATS\s+COMMUNICATION\s+FACILITIES.*")
                #print "Matches of ATS COMMUNICATION FACILITIES on page %d: %s"%(pagenr,matches)
                if len(matches)>0:
                    commitem=matches[0]
                    curname=None
                    
                    callsign=page.get_by_regex_in_rect(ur"Call\s*sign",0,commitem.y1,100,commitem.y2+8)[0]
                    
                    
                    for idx,item in enumerate(page.get_lines(page.get_partially_in_rect(callsign.x1-0.5,commitem.y1,100,100),fudge=0.3,order_fudge=15)):
                        if item.strip()=="":
                            curname=None
                        if re.match(".*RADIO\s+NAVIGATION\s+AND\s+LANDING\s+AIDS.*",item):
                            break
                        #print "Matching:",item
                        m=re.match(r"(.*?)\s*(\d{3}\.\d{1,3})\s*MHz.*",item)
                        #print "MHZ-match:",m
                        if not m: continue
                        #print "MHZ-match:",m.groups()
                        who,sfreq=m.groups()
                        freq=float(sfreq)
                        if abs(freq-121.5)<1e-4:
                            if who.strip():
                                curname=who
                            continue #Ignore emergency frequency, it is understood
                        if not who.strip():
                            if curname==None: continue
                        else:
                            curname=who
                        freqs.append((curname.strip().rstrip("/"),freq))


            for pagenr in xrange(0,p.get_num_pages()):
                page=p.parse_page_to_items(pagenr)                                              
                                
                matches=page.get_by_regex(r".*ATS\s*AIRSPACE.*")
                #print "Matches of ATS_AIRSPACE on page %d: %s"%(pagenr,matches)
                if len(matches)>0:
                    heading=matches[0]
                    desigitem,=page.get_by_regex("Designation and lateral limits")
                    vertitem,=page.get_by_regex("Vertical limits")
                    airspaceclass,=page.get_by_regex("Airspace classification")
                    
                    lastname=None
                    subspacelines=dict()
                    subspacealts=dict()
                    for idx,item in enumerate(page.get_lines(page.get_partially_in_rect(desigitem.x2+1,desigitem.y1,100,vertitem.y1-1))):
                        
                        if item.count("ATS airspace not established"):
                            assert idx==0
                            break
                            
                        if item.strip()=="": continue
                        m=re.match(r"(.*?)(\d{6}N\s+.*)",item)
                        if m:
                            name,coords=m.groups()                            
                            name=name.strip()
                        else:
                            name=item.strip()
                            coords=None
                        if name:
                            lastname=name
                        if coords:
                            subspacelines.setdefault(lastname,[]).append(coords)
                        assert lastname
                    lastname=None

                    #print "Spaces:",subspacelines
                    #print "ICAO",ad['icao']
                    #altlines=page.get_lines(page.get_partially_in_rect(vertitem.x2+1,vertitem.y1,100,airspaceclass.y1-0.2))
                    
                    #print "Altlines:",altlines
                    subspacealts=dict()
                    subspacekeys=subspacelines.keys()
                        
                    allaltlines=" ".join(page.get_lines(page.get_partially_in_rect(vertitem.x1+0.5,vertitem.y1+0.5,100,airspaceclass.y1-0.2)))
                    single_vertlim=False
                    totalts=list(mapper.parse_all_alts(allaltlines))
                    #print "totalts:",totalts 
                    if len(totalts)==2:
                        single_vertlim=True
                    
                    for subspacename in subspacekeys:
                        ceil=None
                        floor=None
                        subnames=[subspacename]
                        if subspacename.split(" ")[-1].strip() in ["TIA","TIZ","CTR","CTR/TIZ"]:
                            subnames.append(subspacename.split(" ")[-1].strip())
                        #print "Parsing alts for ",subspacename,subnames
                        try:                        
                            for nametry in subnames:
                                if single_vertlim: #there's only one subspace, parse all of vertical limits field for this single one.
                                    items=[vertitem]
                                else:
                                    items=page.get_by_regex_in_rect(nametry,vertitem.x2+1,vertitem.y1,100,airspaceclass.y1-0.2)
                                for item in items: 
                                    alts=[]
                                    for line in page.get_lines(page.get_partially_in_rect(item.x1+0.5,item.y1+0.5,100,airspaceclass.y1-0.2)):
                                        #print "Parsing:",line
                                        line=line.replace(nametry,"").lower().strip()
                                        parsed=list(mapper.parse_all_alts(line))
                                        if len(parsed):
                                            alts.append(mapper.altformat(*parsed[0]))
                                        if len(alts)==2: break
                                    if alts:
                                        #print "alts:",alts
                                        ceil,floor=alts
                                        raise StopIteration
                        except StopIteration:
                            pass
                        assert ceil and floor
                        subspacealts[subspacename]=dict(ceil=ceil,floor=floor)             
                        
                    spaces=[]                                        
                    for spacename in subspacelines.keys():
                        altspacename=spacename
                        #print "Altspacename: %s, subspacesalts: %s"%(altspacename,subspacealts)
                        space=dict(
                            name=spacename,
                            ceil=subspacealts[altspacename]['ceil'],
                            floor=subspacealts[altspacename]['floor'],
                            points=parse_coord_str(" ".join(subspacelines[spacename])),
                            freqs=list(set(freqs))
                            )
                        
                        if True:
                            vs=[]
                            for p in space['points']:
                                x,y=mapper.latlon2merc(mapper.from_str(p),13)
                                vs.append(Vertex(int(x),int(y)))                    
                            p=Polygon(vvector(vs))
                            if p.calc_area()<=30*30:
                                pass#print space
                                pass#print "Area:",p.calc_area()
                            assert p.calc_area()>30*30
                            #print "Area: %f"%(p.calc_area(),)
                        
                        spaces.append(space)
                        #print space
                    ad['spaces']=spaces
                    found=True
                if found:
                    break
            assert found                            
            ad['runways']=rwy_constructor.get_rwys(thrs)
                            
                            
            #Now find any ATS-airspace
    chartblobnames=[]
    for ad in ads:        
        icao=ad['icao']
        if icao in big_ad:          
            parse_landing_chart.help_plc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_2-1_en.pdf"%(icao,icao),
                            icao,ad['pos'],"se",variant="")
            parse_landing_chart.help_plc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_6-1_en.pdf"%(icao,icao),
                            icao,ad['pos'],"se",variant="vac")

            parse_landing_chart.help_plc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_2-3_en.pdf"%(icao,icao),
                            icao,ad['pos'],"se",variant="parking")
            
            
            #aip_text_documents.help_parse_doc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_6_1_en.pdf"%(icao,icao),
            #            icao,"se",title="General Information",category="general")
                                    
            
            aip_text_documents.help_parse_doc(ad,"/AIP/AD/AD 2/%s/ES_AD_2_%s_en.pdf"%(icao,icao),
                        icao,"se",title="General Information",category="general")
            
                  

    
    #if purge:
    #    parse_landing_chart.purge_old(chartblobnames,country="se")        
    
    #sys.exit(1)

    for extra in extra_airfields.extra_airfields:
        if filtericao(extra):
            ads.append(extra)
    print
    print
    for k,v in sorted(points.items()):
        print k,v,mapper.format_lfv(*mapper.from_str(v['pos']))
        
    #print "Num points:",len(points)
    
    origads=list(ads)    
    for flygkartan_id,name,lat,lon,dummy in csv.reader(open("fplan/extract/flygkartan.csv"),delimiter=";"):
        found=None
        lat=float(lat)
        lon=float(lon)
        if type(name)==str:
            name=unicode(name,'utf8')
        mercf=mapper.latlon2merc((lat,lon),13)
        for a in origads:
            merca=mapper.latlon2merc(mapper.from_str(a['pos']),13)
            dist=math.sqrt((merca[0]-mercf[0])**2+(merca[1]-mercf[1])**2)
            if dist<120:
                found=a
                break
        if found:
            found['flygkartan_id']=flygkartan_id
        else:
            d=dict(
                    icao='ZZZZ',
                    name=name,
                    pos=mapper.to_str((lat,lon)),
                    elev=int(get_terrain_elev((lat,lon))),
                    flygkartan_id=flygkartan_id)
            if filtericao(d):
                ads.append(d)
                    
    minor_ad_charts=extra_airfields.minor_ad_charts
        
                    
    for ad in ads:     
        if ad['name'].count(u"Långtora"):            
            ad['pos']=mapper.to_str(mapper.from_aviation_format("5944.83N01708.20E"))
            
        if ad['name'] in minor_ad_charts:
            charturl=minor_ad_charts[ad['name']]
            arp=ad['pos']
            if 'icao' in ad and ad['icao'].upper()!='ZZZZ':
                icao=ad['icao'].upper()
            else:
                icao=ad['fake_icao']
                
            parse_landing_chart.help_plc(ad,charturl,icao,arp,country='raw',variant="landing")
            """
            assert icao!=None
            lc=parse_landing_chart.parse_landing_chart(
                    charturl,
                    icao=icao,
                    arppos=arp,country="raw")
            assert lc
            if lc:
                ad['adcharturl']=lc['url']
                ad['adchart']=lc
            """
            
    #print ads
    for ad in ads:
        print "%s: %s - %s (%s ft) (%s)"%(ad['icao'],ad['name'],ad['pos'],ad['elev'],ad.get('flygkartan_id','inte i flygkartan'))
        for space in ad.get('spaces',[]):
            for freq in space.get('freqs',[]):
                print "   ",freq
        #if 'spaces' in ad:
        #    print "   spaces: %s"%(ad['spaces'],)
        #if 'aiptext' in ad:
        #    print "Aip texts:",ad['aiptext']
        #else:
        #    print "No aiptext"
    print "Points:"
    for point in sorted(points.values(),key=lambda x:x['name']):
        print point
        
    f=codecs.open("extract_airfields.regress.txt","w",'utf8')    
    for ad in ads:
        r=repr(ad)
        d=md5.md5(r).hexdigest()
        f.write("%s - %s - %s\n"%(ad['icao'],ad['name'],d))
    f.close()
    f=codecs.open("extract_airfields.regress-details.txt","w",'utf8')    
    for ad in ads:
        r=repr(ad)
        f.write(u"%s - %s - %s\n"%(ad['icao'],ad['name'],r))
    f.close()
    
    return ads,points.values()
Beispiel #12
0
def ev_parse_x(url):
    out = []
    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(url, country="ev")
    parser.feed(data)
    tree = parser.close()
    got_fir = False
    for table in tree.xpath("//table"):
        # print "Table with %d children"%(len(table.getchildren()),)
        rows = list(table.xpath(".//tr"))

        # for idx,col in enumerate(cols):
        #    print "Col %d, %s"%(idx,alltext(col)[:10])
        headingcols = rows[0].xpath(".//th")
        if len(headingcols) == 0:
            continue
        name, alt = headingcols[0:2]
        if alltext(name).count("QNH") and len(headingcols) > 6:
            continue
        print alltext(name)
        assert alltext(name).lower().count("name") or alltext(name).lower().count("lateral")
        print alltext(alt)
        assert alltext(alt).lower().count("limit")

        for row in rows[1:]:
            cols = list(row.xpath(".//td"))
            if len(cols) < 2:
                continue
            name, alt = cols[:2]
            lines = [x.strip() for x in alltext(name).split("\n") if x.strip()]
            if len(lines) == 0:
                continue
            assert len(lines)

            spacename = lines[0].strip()
            if spacename.strip() == "A circle radius 0,5 NM centered on 565705N 0240619E EVR2 RIGA":
                spacename = "EVR2 RIGA"
                lines = [spacename, lines[0][: -len(spacename)].strip()] + lines[1:]
            print spacename
            if spacename.strip() == "SKRIVERI":
                continue
            print "Spacename is:", spacename
            assert (
                spacename[:3] in ["EVR", "EVP", "TSA", "TRA"]
                or spacename.endswith("ATZ")
                or spacename.endswith("ATZ (MILITARY)")
            )

            altcand = []
            for altc in alltext(alt).split("\n"):
                if altc.count("Real-time"):
                    continue
                altcand.append(altc.strip())
            print "Altcands:", altcand
            ceiling, floor = [x.strip() for x in " ".join(altcand).split("/")]
            ceiling = strangefix(ceiling)
            floor = strangefix(floor)

            mapper.parse_elev(ceiling)
            ifloor = mapper.parse_elev(floor)
            iceiling = mapper.parse_elev(ceiling)
            if ifloor >= 9500 and iceiling >= 9500:
                continue
            assert ifloor < iceiling

            freqs = []
            raw = " ".join(lines[1:])
            raw = re.sub(s(ur"Area bounded by lines successively joining the following points:"), "", raw)
            print "Raw:", raw

            coords = mapper.parse_coord_str(raw, context="latvia")
            for cleaned in clean_up_polygon(coords):
                out.append(
                    dict(
                        name=spacename,
                        points=cleaned,
                        type="R",
                        freqs=freqs,
                        floor=floor,
                        url=url,
                        date=date,
                        ceiling=ceiling,
                    )
                )

    return out
Beispiel #13
0
def parse_all_tma():
    def fixgote(raw):
        #Fix illogical composition of Göteborg TMA description. 2010 04 02
        did_replace = [0]

        def replacer(args):
            uprint(args.groups())
            y, x, w, h, font = args.groups()
            uprint(w, h)
            assert int(w) >= 260 and int(w) < 420
            assert int(h) >= 6 and int(h) <= 15
            f = float(w) / 270.0
            x1 = x
            y1 = y
            w1 = 80
            h1 = h

            x2 = 168 * f
            y2 = y
            w2 = 150 * f
            h2 = h
            did_replace[0] += 1
            repl = """<text top="%s" left="%s" width="%s" height="%s" font="%s">Part of GÖTEBORG TMA</text>
                           <text top="%s" left="%s" width="%s" height="%s" font="%s">584558N 0122951E - 584358N 0130950E - </text>""" % (
                y1, x1, w1, h1, font, y2, x2, w2, h2, font)
            uprint("\n======================================\nReplacement:\n",
                   repl)
            return repl

        raw = re.sub(
            r"""<text top="(\d+)" left="(\d+)" width="(\d+)" height="(\d+)" font="(\d+)">\s*Part of GÖTEBORG TMA  584558N 0122951E - 584358N 0130950E - </text>""",
            replacer, raw)
        assert did_replace[0] == 1
        return raw

    p = parse.Parser("/AIP/ENR/ENR 2/ES_ENR_2_1_en.pdf")

    res = []
    found = False
    last_sector = dict()
    for pagenr in xrange(0, p.get_num_pages()):
        page = p.parse_page_to_items(pagenr)
        #print "Num acc-sec:",len(page.get_by_regex(r".*ACC.sectors.*"))
        #print "Num and acc-sec:",len(page.get_by_regex(r".*and\s+ACC.sectors.*"))

        sect = (len(page.get_by_regex(r".*ACC.sectors.*")) > 0
                and len(page.get_by_regex(r".*and\s+ACC.sector.*")) == 0)
        #print "ACC-sector2:",sect
        if found or page.get_by_regex(r".*Terminal Control Areas.*") or sect:
            found = True
        else:
            continue
        #if sect:
        parsed = parse_page(p,
                            pagenr,
                            "TMA" if not sect else "sector",
                            last_sector=last_sector)
        res.extend(parsed)

    res.append(
        dict(name="SWEDEN FIR",
             icao="ESAA",
             floor='GND',
             ceiling='-',
             freqs=[],
             type='FIR',
             date=datetime(2011, 4, 9),
             points=mapper.parse_coord_str("""
690336N 0203255E - 
Along the common X/Y state boundary to 653148N 0240824E -
644100N 0225500E - 633700N 0213000E -
632830N 0204000E - 631000N 0201000E -
614000N 0193000E - 610000N 0191905E -
601803N 0190756E - 601130N 0190512E -
593346N 0195859E - 591524N 0203239E -
590000N 0210000E - 573410N 0200900E -
570000N 0195000E - 555100N 0173300E -
545500N 0155200E - 545500N 0150807E -
clockwise along an arc centred on 550404N 0144448E and with radius 16.2 NM -
545500N 0142127E - 545500N 0125100E -
552012N 0123827E - Along the common X/Y state boundary to 561253N 0122205E -
583000N 0103000E - 584540N 0103532E -
585332N 0103820E - Along the common X/Y state boundary to 690336N 0203255E
                                        
""",
                                           context="sweden")))

    for pa in res:
        pretty(pa)
    return res
Beispiel #14
0
def parse_page(parser,pagenr,kind="TMA",last_sector=dict()):   
    if kind=="TMA":
        thirdcols=["ATC unit","AFIS unit"]
    elif kind=="sector":
        thirdcols=["FREQ"]
    elif kind=="R":
        thirdcols=["Remarks (nature of hazard,"]
    else:
        raise Exception("Bad kind")
    page=parser.parse_page_to_items(pagenr)
    items=page.items
    #print "Items:",pitems    

    #print "Possible Areas:"
    headings=[]
    for item in items:        
        if item.text==None: continue
        item.text=item.text.strip()
        if item.text=="": continue
        if item.text=="Name": continue
        if item.y1<25 and item.text in (["Lateral limits","Vertical limits"]+thirdcols):
            headings.append(item)  
    
    headings.sort(key=lambda x:x.x1)    
    #print "found candidates:",zone_candidates    
    if len(headings)==0:
        return []
    avg_heading_y=sum(h.y1 for h in headings)/float(len(headings))
    uprint("Found headings:",headings)
    zone_candidates=[]
    for item in items:        
        if item.text==None or item.text.strip()=="": continue
        if item.text.strip().startswith("AMDT"): continue
        if item.text.strip().startswith("The LFV Group"): continue
        if re.match(ur"\s*LFV\s*AIRAC\s*AMDT\s*\d+/\d+\s*",item.text): continue
        if item.text.strip()=="LFV": continue
        if item.text.count('Terminal Information Areas'): continue
        if item.text.strip().startswith("AIRAC"): continue        
        if kind=="R" and not is_r_or_danger_area_name(item.text.strip()):
            continue
        if item.y1>avg_heading_y+1 and item.x1<12 and not item.text in ["Name",'None',"LFV"]:
            if item.text.count("Established") or item.text.count(u'TROLLHÄTTAN TWR') or item.text.count(u'and/or SÅTENÄS') or item.text.count(u'TWR/TMC') or item.text.strip().endswith("TWR") or item.text.strip().endswith("TWR."):
                continue
            if item.text.count("operational hours") or item.text.count("See AIP DENMARK"):
                continue
            if item.text.count("hours of"):
                continue
            if item.text.count("Upper limit"):
                continue
            if item.text.count("that part") or item.text.count("coincides"):
                continue
            if item.text.count(u'Danger area EK D395 and') or item.text.count(u'D396 are situated within') or item.text.strip()=="TMA":
                continue
            if item.text.count(u'ÖSTGÖTA TMC is closed') or item.text.count(u'and SKAVSTA TWR is') or item.text.strip()=='open.':
                continue
            if item.text.count("SAT 0530"): 
                continue
            if item.text.strip()=='OPS': 
                continue
            if item.text.strip()==u'ÖSTGÖTA TMC:': 
                continue
            if item.text.count(u'is open') or item.text.count('is closed'):
                continue
            if item.text.count('MON-FRI') or item.text.count('2150'): 
                continue
            lines2=page.get_lines(page.get_partially_in_rect(12,item.y1+0.2,40,item.y2-0.2))
            if len(lines2):
                zone_candidates.append(item)
    
    uprint("Found cands:",zone_candidates)
    zone_candidates.sort(key=lambda x:x.y1)
    
    for zone in zone_candidates:
        #uprint("Zone:",zone)
        #assert not zone.text.count("AOR")
        assert not zone.text.count("FIR")
    
    uprint("Headings:",headings)        
    print "Pagenr:",pagenr
    assert len(headings)==3
    
    
    
    ret=[]
    for i in xrange(len(zone_candidates)):
        d=dict()
        cand=zone_candidates[i]
        if i<len(zone_candidates)-1:
            nextcand=zone_candidates[i+1]
        else:
            nextcand=None
        y1=cand.y1-0.25
        y2=100
        if nextcand: y2=nextcand.y1-0.75
        for j in xrange(len(headings)):
            head=headings[j]
            if j<len(headings)-1:
                nexthead=headings[j+1]
            else:
                nexthead=None
            x1=head.x1
            x2=head.x2
            if j==len(headings)-1:                
                x1=headings[j-1].x2+3
                x2=100
            lines=page.get_lines(page.get_partially_in_rect(x1,y1,x2,y2,xsort=True,ysort=True))
            #print ("Parsed %s y,%d-%d, %s: <%s>\n\n"%(cand.text,y1,y2,head.text,lines)).encode('utf8')
            d[head.text]=lines        
        
        if kind=="R":
            if y2==100: y2=y1+3
            d['name']=" ".join(x.strip() for x in filter_head_foot(page.get_lines(page.get_partially_in_rect(0,y1,10,y2,xsort=True,ysort=True))))
        else:
            d['name']=cand.text.strip()
        ret.append(d)  


    allow_head=2
    print "Doing fixups--------------------------------------------------"
    tret=[]
    for x in ret:
        #print "Fixing up",x,"allow:",allow_head
        area="".join(x['Lateral limits']).strip()
        if allow_head==2 and area!="" and x['name'].strip()!="":
            allow_head=1
            
        if allow_head!=1:
            if len(tret):
                tret[-1]['Lateral limits']+=x['Lateral limits']
                tret[-1]['Vertical limits']+=x['Vertical limits']
        else:
            tret.append(x)
        
        if allow_head==1:
            allow_head=0
                
        if not area.endswith('-') and area!="":
            allow_head=2
            
        #print "   Fixed up up",x
    ret=tret
    for line in ret:
        print "Fixed:",line['name']," = ",line['Lateral limits'],line['Vertical limits']
    out=[]
    for d in ret:
        pa=dict()
        curname=d['name']
        if curname.count(u'Förteckning över'): continue
        print "D:",d
        arealines=[l for l in d['Lateral limits'] if l.strip()!=""]
        last_coord_idx=None
        #uprint("D:<%s> (area:%s)"%(d,arealines))
        if 'FREQ' in d:
            freqs=[("SWEDEN CONTROL",float(x)) for x in re.findall(r"\d{3}\.\d{3}","\n".join(d['FREQ']))]
            #print "Parsed freqs:",freqs
            if freqs:
                last_sector['freqs']=freqs
            
        if kind=='sector':            
            m=re.match(r"ES[A-Z]{2}\s*ACC\s*sector\s*([0-9a-zA-Z]*)",d['name'])
            if m:
                last_sector['major']=d['name']
                last_sector['majorsector'],=m.groups()
            if len(arealines)==0:
                last_sector['name']=d['name']
                continue
            
            if d['name'].count("Control Area and Upper Control Area"): continue        
            if d['name'].count("SUECIA CTA"): continue        
            if d['name'].count("SUECIA UTA"): continue
            
            m=re.match(r"([0-9a-zA-Z]*)(:.*)",d['name'])
            if m and 'majorsector' in last_sector:
                sectorname,sub=m.groups()
                if sectorname==last_sector['majorsector']:
                    d['name']=last_sector['major']+sub
                    #uprint("Fixed up name: ",d['name'])
        #print "Arealines:",arealines
        assert len(arealines)
        if arealines[0].strip()=="Danger area EK D395 and D396 are":
            arealines=arealines[1:]
        if arealines[0].strip()=="situated within TMA":
            arealines=arealines[1:]
            
        if arealines==u'Förteckning över CTA / Lists of CTA' or arealines=='Lateral limits':
            continue

        for idx in xrange(len(arealines)):
            if arealines[idx].lower().startswith("established"):
                last_coord_idx=idx
                pa['established']=" ".join(l for l in arealines[idx:])   
                break
            if arealines[idx].lower().startswith("danger area"):
                last_coord_idx=idx
                break
            if arealines[idx].strip()=="LFV":
                last_coord_idx=idx
                break
        if last_coord_idx==None:
            last_coord_idx=len(arealines)
        #uprint("ARealines:",arealines)
        #uprint("Last coord:",arealines[last_coord_idx-1])
        if len(arealines)>last_coord_idx:
            if arealines[last_coord_idx-1:last_coord_idx+1]==[u'571324N 0161129E -', u'Established during operational hours of']:
                arealines[last_coord_idx-1]=arealines[last_coord_idx-1].strip("-")
        #uprint("Last fixed:",arealines[last_coord_idx-1])
        assert not arealines[last_coord_idx-1].strip().endswith("-")
        #for idx in xrange(last_coord_idx-1):
        #    print "arealine: <%s>"%(arealines[idx].strip(),)
        #    assert arealines[idx].strip().endswith("-") or arealines[idx].strip().endswith("to")
        
        vertlim=u" ".join(d['Vertical limits'])
        if vertlim.strip()=="":
            #print "Object with no vertical limits: %s"%(repr(d['name']),)
            continue
        
        if d['name']=='Control Area':
            continue

        uprint("Vertlim: ",vertlim)
        heightst=re.findall(r"(FL\s*\d{3})|(\d+\s*ft\s*(?:\s*/\s*\d+\s*.\s*GND)?(?:\s*GND)?)|(GND)|(UNL)",vertlim)
        uprint("Height candidates:",heightst)
        heights=[]
        for fl,ht,gnd,unl in heightst:
            if fl:
                heights.append(fl)
            if ht:
                heights.append(ht.strip())
            if gnd:
                heights.append(gnd.strip())
            if unl:
                heights.append(unl.strip())
        uprint("heights for ",d['name'],":",repr(heights))
        if len(heights)==0 and d['name']==u'GÖTEBORG TMA':
            heights=['GND','FL95']
        if len(heights)==1 and d['name']==u'Göteborg TMA':
            heights=['4500','FL95']
        assert len(heights)==2
        ceiling=heights[0].strip()
        floor=heights[1].strip()
                
        pa['name']=d['name']
        pa['floor']=floor
        pa['ceiling']=ceiling
        if mapper.parse_elev(floor)>=9500:
            continue
        #uprint("Arealines:\n================\n%s\n============\n"%(arealines[:last_coord_idx]))
        #print pa
        areacoords=" ".join(arealines[:last_coord_idx])
        pa['points']=parse_coord_str(areacoords)
        
        
        vs=[]
        for p in pa['points']:
            #print "from_str:",repr(p)
            x,y=mapper.latlon2merc(mapper.from_str(p),13)
            vs.append(Vertex(int(x),int(y)))

        p=Polygon(vvector(vs))
        if p.calc_area()<=30*30:
            pass#print pa
            #print "Area:",p.calc_area()
        assert p.calc_area()>30*30
        #print "Area: %f"%(p.calc_area(),)
        #print "Point-counts:",len(pa['points'])
        for p in pa['points']:
            assert p.count(",")==1 
        pa['type']=kind
        for thirdcol in thirdcols:
            if thirdcol in d:
                atc=d[thirdcol]
                break
        else:
            raise Exception("missing thirdcol")
        #print "ATc: <%s>"%(repr(atc),)
        freqs=[(y,float(x)) for x,y in re.findall(r"(\d{3}\.\d{3})\s*MHz\n(.*)","\n".join(atc))]
        if not freqs:
            freqs=last_sector.get('freqs',[])
        #print repr(freqs)
        pa['freqs']=freqs
        #uprint("Cleaning up ",pa['name'])
        for cleaned in clean_up_polygon(list(pa['points'])):
            d=dict(pa)
            #print "cleaned",cleaned
            for i,tup in enumerate(cleaned):
                assert type(tup)==str
                latlon=mapper.from_str(tup)
                lat,lon=latlon
                assert lat>=-85 and lat<=85
            d['points']=cleaned
            #uprint("cleaned:",pa['name'],len(cleaned),cleaned)
            #print "name:",d['name']
            #print "cleaned points:",d['points']
            #print "from:",areacoords
            #raise Exception()
            out.append(d)
        #if pa['name'].lower().count("esrange"):
        #    print "Exit esrange"
        #    sys.exit(1)
                    
    return out
Beispiel #15
0
def fi_parse_tma():
    p = parse.Parser(r"/ais/eaip/pdf/enr/EF_ENR_2_1_EN.pdf",
                     fixuphref,
                     country='fi')

    res = []
    atsres = []
    for pagenr in xrange(4, p.get_num_pages()):
        parsed, atsparsed = parse_page(p, pagenr)  #pagenr)
        res.extend(parsed)
        atsres.extend(atsparsed)
        #break

    print "Len ouf out ", len(res)
    atsout = []
    for space in atsres:
        #print "bef cut:",space['points']
        mypolys = [makepoly.poly(space['points'])]
        for tmaitem in res:
            if tmaitem['type'] != 'TMA': continue
            outmypolys = []
            assert len(mypolys) >= 1
            for mypoly in list(mypolys):
                tmapoly = makepoly.poly(tmaitem['points'])
                #print mypoly
                #print tmapoly
                shape = mypoly.subtract(tmapoly)
                newpolys = shape.get_polys()
                if len(newpolys) > 1:
                    print "Length is:", len(newpolys)
                #print "Cutting"
                outmypolys.extend(
                    [shapemerge2d.Polygon(x) for x in list(newpolys)])
                #assert len(newpolys)==1
            if len(outmypolys) > 1:
                print "outmypolys:", outmypolys
                #print "Cut to:",mypoly
            mypolys = outmypolys

        for mypoly in mypolys:
            t = []
            for mx, my in [(v.get_x(), v.get_y())
                           for v in mypoly.get_vertices()]:
                t.append(mapper.to_str(mapper.merc2latlon((mx, my), 13)))
            #print "Aft cut:",t
            newspace = dict(space)
            newspace['points'] = t
            atsout.append(newspace)
        if len(mypolys) > 1:
            print "Space was split into ", len(mypolys), "parts"
    res.extend(atsout)

    res.append(
        dict(name="FINLAND FIR",
             icao="EFIN",
             floor='GND',
             ceiling='-',
             freqs=[],
             type='FIR',
             date=datetime(2011, 4, 9),
             points=mapper.parse_coord_str(
                 """                                   
    601130N 0190512E - 601803N 0190756E -
610000N 0191905E - 614000N 0193000E -
631000N 0201000E - 632830N 0204000E -
633700N 0213000E - 644100N 0225500E -
653148N 0240824E -
Along the common X/Y state boundary to 690336N 0203255E -
Along the common X/Y state boundary to 690307N 0285545E -
Along the common X/Y state boundary to 601201N 0271735E - 
600800N 0263300E -
595830N 0260642E - 595300N 0255200E -
595430N 0252000E - 595300N 0245100E -
590000N 0210000E - 591524N 0203239E -
593346N 0195859E - 601130N 0190512E
""",
                 context="finland")))

    #for pa in res:
    #    pretty(pa)

    return res
Beispiel #16
0
def ee_parse_gen_r2(url):
    spaces = []
    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(url, country='ee')
    parser.feed(data)
    tree = parser.close()
    print "Parsed tree"
    for tab in tree.xpath(".//table"):
        print "Found table"
        for idx, cand in enumerate(tab.xpath(".//tr")):
            if len(cand.getchildren()) < 3:
                continue
            space = dict()
            #print list(cand.getchildren())
            what, vert, remark = list(cand.getchildren())[0:3]
            whattxt = alltext(what).replace(u"–", "-").replace(u"\xa0", " ")

            verttxt = alltext(vert)

            while True:
                w = re.sub(ur"\(.*?\)", "", whattxt)
                if w != whattxt:
                    whattxt = w
                    continue
                break

            #print idx,whattxt
            if idx < 3:
                if idx == 1:
                    assert (whattxt.count("Identification")
                            or whattxt.count("ateral limits"))
                if idx == 2: assert whattxt.strip() == "1"
                continue
            verttxt = verttxt.replace(u"\xa0", u" ")
            vertlines = [x for x in verttxt.split("\n") if x.strip()]
            if len(vertlines) == 1:
                vertlines = [x for x in verttxt.split("  ") if x.strip()]
            print "Verlintes:", repr(vertlines)
            #print "wha------------------------ t",whattxt
            space['ceiling'], space['floor'] = vertlines[:2]
            mapper.parse_elev(space['ceiling'])
            ifloor = mapper.parse_elev(space['floor'])
            if ifloor >= 9500: continue
            lines = whattxt.split("\n")
            out = []
            merged = ""
            for line in lines[1:]:
                line = line.strip().replace(u"–", "-")
                if line == "": continue
                if line.endswith("point"):
                    out.append(line + " ")
                    continue
                if line.endswith("ircle with radius of") or line.endswith(
                        ",") or line.endswith("on") or line.endswith("radius"):
                    merged = " ".join([merged, line])
                    print "<---Merged:", merged
                    continue
                if merged:
                    line = " ".join([merged, line])
                merged = ""
                if not line.endswith("-"):
                    line = line + " -"
                out.append(line + "\n")

            space['name'] = lines[0].strip()
            w = "".join(out)
            print "Parsing:", w
            if space['name'].startswith('EER1 '):
                w = ee_parse_tma2.eer1txt
                fir = mapper.parse_coord_str(ee_parse_tma2.firtxt,
                                             context='estonia')
                fir_context = [fir]
                space['points'] = mapper.parse_coord_str(
                    w, fir_context=fir_context)
            else:
                space['points'] = mapper.parse_coord_str(w, context='estonia')
            space['type'] = 'R'
            space['date'] = date
            space['freqs'] = []
            space['url'] = fetchdata.getrawurl(url, 'ee')
            spaces.append(space)
    return spaces
def ee_parse_restrictions():
    spaces = []
    p = parse.Parser("/ee_restricted_and_danger.pdf",
                     lambda x: x,
                     country='ee')
    for pagenr in xrange(p.get_num_pages()):
        page = p.parse_page_to_items(pagenr)
        raws = list(
            sorted(page.get_by_regex(ur"EE[RD]\d+\s+.*"),
                   key=lambda x: x.y1)) + [None]
        if len(raws) > 1:
            elevs = page.get_by_regex(ur"\d+\s*FT\s*MSL|FL\s*\d+")
            assert elevs
            elevcol = min(elev.x1 for elev in elevs)
            assert elevcol != 100
            for cur, next in izip(raws[:-1], raws[1:]):
                #if cur.text.count("Tunnus, nimi ja sivurajat"): continue #not a real airspace
                space = dict()
                if next == None:
                    y2 = 100
                else:
                    y2 = next.y1 - 1.75
                name = cur.text.strip()
                space['name'] = name

                areaspecprim = page.get_lines(page.get_partially_in_rect(
                    cur.x1 + 0.01, cur.y2 + 0.05, elevcol - 2, y2),
                                              fudge=.25)
                #print "areaspecprim:\n","\n".join(areaspecprim)
                areaspec = []
                for area in areaspecprim:
                    print "area in ", area
                    area = area.replace(u"–", "-")
                    if len(areaspec) and area.strip() == "": break
                    area = re.sub(ur"\w-$", "", area)
                    areaspec.append(area)
                #print "Y-interval:",cur.y1,y2,"next:",next
                #print "Name:",space['name']
                #print "areaspec:",areaspec
                inp = " ".join(areaspec)
                #print inp
                #raw_input()

                tpoints = mapper.parse_coord_str(inp, context='estonia')
                if name.startswith("EER1"):
                    tseaborder = "592842N 0280054E - 593814N 0273721E - 593953N 0265728E - 594513N 0264327E"
                    seapoints = mapper.parse_coord_str(tseaborder)
                    cont = None
                    points = []

                    def close(a, b):
                        bearing, dist = mapper.bearing_and_distance(
                            mapper.from_str(a), mapper.from_str(b))
                        #print (a,b),dist
                        return dist < 1.0

                    for idx, point in enumerate(tpoints):
                        points.append(point)
                        if close(point, seapoints[0]):
                            print "WAS CLOSE", point, seapoints[0]
                            points.extend(seapoints[1:-1])
                            for idx2, point in enumerate(tpoints[idx + 1:]):
                                if close(point, seapoints[-1]):
                                    points.extend(tpoints[idx + 1 + idx2:])
                                    break
                            else:
                                raise Exception("Couldn't find seaborder end")
                            break
                    else:
                        raise Exception("Couldn't find seaborder")
                else:
                    points = tpoints
                space['points'] = points
                vertitems = page.get_partially_in_rect(elevcol, cur.y1 + 0.05,
                                                       elevcol + 8, y2 + 1.5)
                vertspec = []
                for v in page.get_lines(vertitems):
                    if v.strip() == "": continue
                    if v.strip().count("Lennuliiklusteeninduse AS"):
                        continue
                    vertspec.append(v.strip())

                print "vertspec:", vertspec
                assert len(vertspec) == 2
                ceiling, floor = vertspec

                if mapper.parse_elev(floor) >= 9500 and mapper.parse_elev(
                        ceiling) >= 9500:
                    continue

                space['ceiling'] = ceiling
                space['floor'] = floor
                space['type'] = 'R'
                space['freqs'] = []
                spaces.append(space)

    spaces.append(
        dict(name="EE TSA 1",
             ceiling="UNL",
             floor="5000 FT GND",
             points=mapper.parse_coord_str(u""" 
            594500N 0255000E – 594500N 0261800E – 
            592100N 0265800E – 591200N 0261200E – 
            591600N 0255400E – 594500N 0255000E"""),
             type="TSA",
             date=datetime(2011, 03, 25),
             freqs=[]))
Beispiel #18
0
                        #print "freqname Matched:",line
                        fname, = g.groups()
                        fname = fname.strip()
                        break
                if not fname:
                    raise Exception("Found no frequency name for freq: " +
                                    freq)
                freqs.append((fname, float(freq)))
            if len(freqs): break

        (ceiling, ceilingy), (floor, floory) = verts
        assert ceilingy < floory
        assert floory - ceilingy < 5.0
        uprint("Analyzing area for %s" % (name, ))
        assert "".join(areaspec).strip() != ""
        area = mapper.parse_coord_str("".join(areaspec), context='estonia')
        uprint("Done analyzing %s" % (name, ))
        #print area
        if name.count("CTA") and name.count("TMA") == 0:
            type_ = "CTA"
        else:
            type_ = "TMA"

        if re.match(ur"\s*TALLINN\s*TMA\s*1\s*", name):
            out.append(
                dict(name="TALLIN TMA 2",
                     floor='1700 ft MSL',
                     ceiling='3500 ft MSL',
                     freqs=freqs,
                     type='TMA',
                     points=mapper.parse_coord_str("""                
def ee_parse_airfield(icao=None):
    spaces = []
    ad = dict()
    ad["icao"] = icao
    sigpoints = []
    p = parse.Parser("/ee_%s.pdf" % (icao,), lambda x: x, country="ee")

    page = p.parse_page_to_items(0)
    print icao
    nameregex = ur".*%s\s*[-−]\s*([A-ZÅÄÖ\- ]{3,})" % (icao,)
    for item in page.get_by_regex(nameregex):
        print "fontsize:", item.fontsize
        assert item.fontsize >= 10
        ad["name"] = re.match(nameregex, item.text).groups()[0].strip()
        break
    else:
        raise Exception("Found no airfield name!")

    for item in page.get_by_regex(ur".*Kõrgus merepinnast.*"):
        lines = page.get_lines(page.get_partially_in_rect(0, item.y1 + 0.1, 100, item.y2 - 0.1))
        for line in lines:
            ft, = re.match(".*?([\d\.]+)\s*FT\.*", line).groups()
            assert not "elev" in ad
            print "parsed ft:", ft
            ad["elev"] = float(ft)

    for item in page.get_by_regex(ur"ARP koordinaadid"):
        lines = page.get_lines(page.get_partially_in_rect(item.x1, item.y1, 100, item.y2))
        for line in lines:
            print line
            for crd in mapper.parsecoords(line):
                assert not ("pos" in ad)
                ad["pos"] = crd
                break
            else:
                raise Exception("No coords")
    ad["runways"] = []
    thrs = []
    freqs = []
    for pagenr in xrange(p.get_num_pages()):
        page = p.parse_page_to_items(pagenr)
        print "Parsing page", pagenr
        for item in page.get_by_regex("\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*"):
            print "Phys char"

            coords, = page.get_by_regex_in_rect("RWY end coordinates", 0, item.y2, 100, 100)

            design, = page.get_by_regex_in_rect("Designations", 0, item.y2, 100, 100)

            lines = page.get_lines(page.get_partially_in_rect(0, design.y2, design.x2, 100))
            print "Design", lines
            rwys = []
            for line in lines:
                m = re.match("(\d{2})", line)
                if m:
                    print "rwynum", line
                    rwys.append((m.groups()[0], line.y1))
            rwys.append((None, 100))
            for (rwy, y), (nextrwy, nexty) in izip(rwys, rwys[1:]):
                lines = page.get_lines(page.get_partially_in_rect(coords.x1, y, coords.x2, nexty - 0.5))
                lines = [line for line in lines if line.strip()]
                print "Lines for rwy", lines
                thrlat, thrlon, endlat, endlon, undulation = lines[:5]
                assert undulation.count("GUND")

                thrs.append(dict(pos=mapper.parse_coords(thrlat, thrlon), thr=rwy))
            print thrs

    if 0:

        for item in page.get_by_regex("ATS AIRSPACE"):
            lines = iter(page.get_lines(page.get_partially_in_rect(0, item.y2 + 0.1, 100, 100)))
            spaces = []
            while True:
                line = lines.next()
                # print "Read line:",line
                if line.count("Vertical limits"):
                    break
                m = re.match(ur".*?/\s+Designation and lateral limits\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:?\s*$", line)
                if not m:
                    m = re.match(ur"\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:", line)
                    # print "Second try:",m

                spacename, = m.groups()
                # print "Got spacename:",spacename
                assert spacename.strip() != ""
                coords = []
                while True:
                    line = lines.next()
                    # print "Further:",line
                    if line.count("Vertical limits"):
                        break
                    if not re.search(ur"[\d ]+N\s*[\d ]+E", line) and not re.search(
                        ur"circle|cent[red]{1,5}|pitkin|point", line
                    ):
                        break
                    coords.append(line)

                areaspec = "".join(coords)

                def fixup(m):
                    lat, lon = m.groups()
                    return lat.replace(" ", "") + " " + lon.replace(" ", "")

                areaspec = re.sub(ur"([\d ]+N)\s*([\d ]+E)", fixup, areaspec)
                # print "Fixed areaspec",areaspec
                # if icao=="EFKS":
                #    areaspec=areaspec.replace("6615 28N","661528N")
                # Error! REstriction areas!
                spaces.append(dict(name=spacename, type="CTR", points=mapper.parse_coord_str(areaspec)))
                if line.count("Vertical limits"):
                    # print "Breaking"
                    break
            while not line.count("Vertical limits"):
                line = lines.next()
            # print "Matching veritcal limits--------------------------------"
            oldspaces = spaces
            spaces = []
            for space in oldspaces:
                if space["name"].count("/"):
                    a, b = space["name"].split("/")
                    spaces.append(dict(space, name=a.strip()))
                    spaces.append(dict(space, name=b.strip()))
                else:
                    spaces.append(space)
            missing = set([space["name"] for space in spaces])
            while True:
                for space in spaces:
                    # print "Matching ",space['name']," to ",line,"missing:",missing
                    for it in xrange(2):
                        cand = space["name"]
                        if it == 1:
                            if cand.count("CTR"):
                                cand = "CTR"
                            if cand.count("FIZ"):
                                cand = "FIZ"
                        m = re.match(ur".*%s\s*:([^,:-]*)\s*-\s*([^,:-]*)" % (cand,), line)
                        if m:
                            break
                    if len(spaces) == 1 and not m:
                        m = re.match(ur".*Vertical limits\s*(.*)\s*-\s*(.*)", line)
                    if m:
                        for lim in m.groups():
                            assert lim.count(",") == 0
                        space["floor"], space["ceiling"] = m.groups()
                        missing.remove(space["name"])
                    # print "Missing:"
                    if len(missing) == 0:
                        break
                if len(missing) == 0:
                    break
                line = lines.next()

        print "Parse f o n page", pagenr
        for item2 in page.get_by_regex(ur".*ATS\s*COMMUNICATION\s*FACILITIES.*"):
            lines = page.get_lines(page.get_partially_in_rect(0, item2.y2 + 0.1, 100, 100))
            for line in lines:
                if line.count("RADIO NAVIGATION AND LANDING AIDS"):
                    break
                print "Comm line:", line
                twr = re.match(ur"TWR.*(\d{3}\.\d{3})\b.*", line)
                if twr:
                    freqs.append(("TWR", float(twr.groups()[0])))
                atis = re.match(ur"ATIS.*(\d{3}\.\d{3})", line)
                if atis:
                    freqs.append(("ATIS", float(atis.groups()[0])))
Beispiel #20
0
def ee_parse_tma2():
    spaces = []
    airac_date = get_airac_date()
    url = "/%s/html/eAIP/EE-ENR-2.1-en-GB.html" % (airac_date, )
    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(url, country='ee')
    parser.feed(data)
    tree = parser.close()
    icaos = []

    def nested(tab):
        if tab == None: return False
        if tab.getparent() is None:
            return False
        #print dir(tab)
        if tab.tag == 'table':
            return True
        return nested(tab.getparent())

    for tab in tree.xpath(".//table"):
        print "table alltext:", alltext(tab)
        if nested(tab.getparent()): continue
        firsttr = tab.xpath(".//tr")[0]
        ntext = alltext(firsttr)
        print "firsttr", firsttr
        print "ntext", ntext
        if re.match(ur".*FIR\s*/\s*CTA.*", ntext):
            print "Matches Tallin FIR"
            name = 'TALLIN FIR'
            points = mapper.parse_coord_str(firtxt, context='estonia')
            floor, ceiling = "GND", "FL195"
            space = {}
            space['name'] = name
            space['points'] = points
            space['floor'] = floor
            space['ceiling'] = ceiling
            space['freqs'] = []
            space['icao'] = 'EETT'
            space['type'] = 'FIR'
            space['date'] = date
            space['url'] = fetchdata.getrawurl(url, 'ee')
            spaces.append(space)
            continue
        else:
            name = ntext.strip()
        space = dict(name=name)
        print "Name", name
        assert space['name'].count("TMA") \
            or space['name'].count("FIR")
        if space['name'].count("FIR"):
            type = 'FIR'
        else:
            type = "TMA"
        freqs = []
        points = None
        floor = None
        ceiling = None
        for cand in tab.xpath(".//tr"):
            if len(cand.getchildren()) != 2:
                continue
            nom, what = cand.getchildren()
            whattxt = alltext(what)
            nomtxt = alltext(nom)
            print "nomtxt", nomtxt, "space name", space['name']
            if nomtxt.count("Lateral limits"):
                if space['name'].count("TALLINN TMA 2"):
                    points = mapper.parse_coord_str("""                
                        A circle with radius 20 NM centred on 592448N 0244957E
                        """)
                else:
                    whattxt = whattxt.replace(
                        "then along the territory dividing line between Estonia and Russia to",
                        "- Along the common Estonian/X state boundary to ")
                    print "Fixed up", whattxt
                    points = mapper.parse_coord_str(whattxt, context='estonia')
            if nomtxt.count("Vertical limits"):
                floor, ceiling = whattxt.split(" to ")
            if nomtxt.count("Call sign"):
                callsign = whattxt.split("\n")[0]
            if nomtxt.count("freq"):
                freqs.extend(re.findall(ur"\d+\.\d+\s*MHz"))

        assert points and floor and ceiling
        space['points'] = points
        space['type'] = type
        space['floor'] = floor
        space['ceiling'] = ceiling
        space['freqs'] = []
        space['type'] = type
        space['date'] = date
        space['url'] = fetchdata.getrawurl(url, 'ee')
        for freq in freqs:
            space['freqs'].append((callsign, freq))
        spaces.append(space)
Beispiel #21
0
def parse_page(parser,pagenr):   
    page=parser.parse_page_to_items(pagenr)
    items=page.items
    minx=min([item.x1 for item in items])
    headings=[]
    majorre=ur"\s*([A-ZÅÄÖ ][A-ZÅÄÖ]{3,})\s+(?:TMA\s*\d*|MIL CTA)\s*(?:-.*)?$"
    minorre=ur"\s*(?:TMA|MIL CTA [SN]?)\s*[A-ZÅÄÖ ]*\s*"
    for item in page.get_by_regex(majorre):
        m,=re.match(majorre,item.text).groups()
        assert m!=None
        assert m.strip()!=""
        headings.append(('major',item.text.strip(),m,item))
    for item in page.get_by_regex(minorre):
        m=re.match(minorre,item.text).group()
        assert m!=None
        assert m.strip()!=""
        #print "Heading %d: %s"%(item.y1,m)
        headings.append(('minor',item.text.strip(),m,item))
    #print headings
    headings.sort(key=lambda x:x[3].y1)
    def findheadingfor(y,meta=None):
        minor=None
        major=None
        for (kind,full,name,item) in reversed(headings):
            if minor==None and kind=="minor" and item.y1<y:
                minor=name.strip()
                if meta!=None: meta['minor_y']=item.y1
            if major==None and kind=="major" and item.y1<y:
                major=name.strip()
                fullname=full
                if meta!=None: meta['major_y']=item.y1
                break
        assert major!=None and major.strip()!=""
        if minor!=None:
            return major+" "+minor
        return fullname
    cury=0
    coordstrs=page.get_by_regex(ur".*\d{6}N \d{7}E.*")
    out=[]
    while True:
        found=False
        #print "Looking for coords, y= %d"%(cury,)
        for titem in coordstrs:
            #print "Considering coordstr: ",titem.y1
            if titem.y1<=cury: continue
            if titem.x1<40: 
                item=titem
                found=True
                break
        if not found: break
        cury=item.y1
        headmeta=dict()
        name=findheadingfor(item.y1,headmeta)
        areaspec=[]
        #print "Rect: ",0,cury,minx+35,100
        y1=cury
        lines=page.get_lines(page.get_partially_in_rect(0,cury,minx+25,100))
        for idx,line in enumerate(lines):
            if re.search(ur"FL \d+",line) or line.count("FT MSL"): 
                vertidx=idx
                break            
            #print "Line:",line.encode('utf8')
            if line.strip()=="":
                vertidx=idx
                break
            cury=max(cury,line.y2+0.5)                
            line=line.replace(u"–","-")
            if not (line.endswith("-") or line.endswith(" ")):
                line+=" "                
            areaspec.append(line)
        verts=[]
        
        for idx in xrange(vertidx,len(lines)):
            #print "Looking for alt:",lines[idx],"y2:",lines[idx].y2
            m=re.search(ur"(FL\s+\d+)",lines[idx].strip())
            if m:
                verts.append((m.groups()[0],lines[idx].y1))
            m=re.search(ur"(\d+ FT (?:MSL|GND|SFC))",lines[idx].strip())
            if m:
                verts.append((m.groups()[0],lines[idx].y1))
            if len(verts)>=2: break
        y2=verts[-1][1]
        freqs=[]
        for attempt in xrange(2):
            for freqcand in page.get_by_regex(ur".*\d{3}\.\d{1,3}.*"):
                #print "headmeta:",headmeta
                #print "attempt:",attempt
                #print "freqy1:",freqcand.y1
                if freqcand.x1<30: continue
                if attempt==0:
                    if freqcand.y1<y1: continue
                else:
                    if 'major_y' in headmeta:                    
                        if freqcand.y1<headmeta['major_y']: continue
                    else:
                        if freqcand.y1<y1: continue
                                
                    
                if freqcand.y1>y2: continue
                x,y=freqcand.x1,freqcand.y1
                freq,=re.match(ur".*(\d{3}\.\d{3}).*",freqcand.text).groups()
                if freq=="121.500": continue
                lines=page.get_lines(page.get_partially_in_rect(x-10,y-1,x-0.5,y+1.5))
                fname=None
                for line in reversed(lines):
                    g=re.match(ur".*\b(\w{3,}\s+(?:Approach|Tower)).*",line)
                    if g:                        
                        #print "freqname Matched:",line
                        fname,=g.groups()
                        fname=fname.strip()
                        break
                if not fname: raise Exception("Found no frequency name for freq: "+freq)
                freqs.append((fname,float(freq)))
            if len(freqs): break
        
        (ceiling,ceilingy),(floor,floory)=verts
        assert ceilingy<floory
        assert floory-ceilingy<5.0
        uprint("Analyzing area for %s"%(name,))
        assert "".join(areaspec).strip()!=""
        area=mapper.parse_coord_str("".join(areaspec),context='estonia')
        uprint("Done analyzing %s"%(name,))
        #print area
        if name.count("CTA") and name.count("TMA")==0:
            type_="CTA"
        else:
            type_="TMA"
            
        if re.match(ur"\s*TALLINN\s*TMA\s*1\s*",name):
            out.append(dict(
                name="TALLIN TMA 2",
                floor='1700 ft MSL',
                ceiling='3500 ft MSL',
                freqs=freqs,
                type='TMA',
                points=mapper.parse_coord_str("""                
                A circle with radius 20 NM centred on 592448N 0244957E
                """)))
Beispiel #22
0
         elev=elev,
         date=date,
         runways=rwy_constructor.get_rwys(thrs),
         pos=pos)
     if adcharturl:
         ad['adcharturl']=adcharturl
     if 'adcharts' in addummy:
         ad['adcharts']=addummy['adcharts']
         
     aip_text_documents.help_parse_doc(ad,url,
                     icao,"ev",title="General Information",category="general")
         
     ads.append(ad)            
     spaces.append(dict(
         name=ctrname,
         points=mapper.parse_coord_str(ctrarea),
         ceiling=ceiling,
         type=type_,
         floor=floor,
         freqs=freqs,
         date=date,
         url=url            
                   ))
 spilve=dict(
     icao="EVRS",
     name="Spilve",
     elev=5,
     date=datetime(2011,04,05),
     pos=mapper.parsecoord("565931N 240428E")
            )
            
def extract_airfields(filtericao=lambda x: True, purge=True):
    # print getxml("/AIP/AD/AD 1/ES_AD_1_1_en.pdf")
    ads = []
    p = Parser("/AIP/AD/AD 1/ES_AD_1_1_en.pdf")
    points = dict()
    startpage = None
    for pagenr in xrange(p.get_num_pages()):
        page = p.parse_page_to_items(pagenr)
        if page.count("Aerodrome directory"):
            startpage = pagenr
            break
    if startpage == None:
        raise Exception("Couldn't find aerodrome directory in file")
    # print "Startpage: %d"%(startpage,)
    # nochartf=open("nochart.txt","w")
    for pagenr in xrange(startpage, p.get_num_pages()):
        row_y = []
        page = p.parse_page_to_items(pagenr)
        allines = [x for x in (page.get_lines(page.get_partially_in_rect(0, 0, 15, 100))) if x.strip()]
        for item, next in zip(allines, allines[1:] + [""]):
            # print "item:",item

            m = re.match(ur"^\s*[A-ZÅÄÖ]{3,}(?:/.*)?\b.*", item)
            if m:
                # print "Candidate, next is:",next
                if re.match(r"^\s*[A-Z]{4}\b.*", next):
                    # print "Matched:",item
                    # print "y1:",item.y1
                    row_y.append(item.y1)
        for y1, y2 in zip(row_y, row_y[1:] + [100.0]):
            # print "Extacting from y-range: %f-%f"%(y1,y2)
            items = list(page.get_partially_in_rect(0, y1 - 0.25, 5.0, y2 + 0.25, ysort=True))
            if len(items) >= 2:
                # print "Extract items",items
                ad = dict(name=unicode(items[0].text).strip(), icao=unicode(items[1].text).strip())
                # print "Icao:",ad['icao']
                assert re.match(r"[A-Z]{4}", ad["icao"])
                if not filtericao(ad):
                    continue
                if len(items) >= 3:
                    # print "Coord?:",items[2].text
                    m = re.match(r".*(\d{6}N)\s*(\d{7}E).*", items[2].text)
                    if m:
                        lat, lon = m.groups()
                        ad["pos"] = parse_coords(lat, lon)
                        # print "Items3:",items[3:]
                        elev = re.findall(r"(\d{1,5})\s*ft", " ".join(t.text for t in items[3:]))
                        # print "Elev:",elev
                        assert len(elev) == 1
                        ad["elev"] = int(elev[0])

                ads.append(ad)

    big_ad = set()
    for ad in ads:
        if not ad.has_key("pos"):
            big_ad.add(ad["icao"])

    for ad in ads:
        icao = ad["icao"]
        if icao in big_ad:
            if icao in ["ESIB", "ESNY", "ESCM", "ESPE"]:
                continue

            try:
                p = Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_6_1_en.pdf" % (icao, icao))
            except:
                p = Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_6-1_en.pdf" % (icao, icao))

            ad["aipvacurl"] = p.get_url()
            for pagenr in xrange(p.get_num_pages()):
                page = p.parse_page_to_items(pagenr)

                """
                for altline in exitlines:
                    m=re.match(r"(\w+)\s+(\d+N)\s*(\d+E.*)",altline)
                    if not m: continue
                    name,lat,lon=m.groups()
                    try:
                        coord=parse_coords(lat,lon)
                    except Exception:
                        continue
                    points.append(dict(name=name,pos=coord))
                """

                for kind in xrange(2):
                    if kind == 0:
                        hits = page.get_by_regex(r"H[Oo][Ll][Dd][Ii][Nn][Gg]")
                        kind = "holding point"
                    if kind == 1:
                        hits = page.get_by_regex(r"[Ee]ntry.*[Ee]xit.*point")
                        kind = "entry/exit point"
                    if len(hits) == 0:
                        continue
                    for holdingheading in hits:

                        items = sorted(
                            page.get_partially_in_rect(
                                holdingheading.x1 + 2.0, holdingheading.y2 + 0.1, holdingheading.x1 + 0.5, 100
                            ),
                            key=lambda x: x.y1,
                        )
                        items = [x for x in items if not x.text.startswith(" ")]
                        # print "Holding items:",items
                        for idx, item in enumerate(items):
                            print "Holding item", item
                            y1 = item.y1
                            if idx == len(items) - 1:
                                y2 = 100
                            else:
                                y2 = items[idx + 1].y1
                            items2 = [
                                x
                                for x in page.get_partially_in_rect(item.x1 + 1, y1 + 0.3, item.x1 + 40, y2 - 0.1)
                                if x.x1 >= item.x1 - 0.25 and x.y1 >= y1 - 0.05 and x.y1 < y2 - 0.05
                            ]
                            s = (" ".join(page.get_lines(items2))).strip()
                            print "Holding lines:", repr(page.get_lines(items2))
                            # if s.startswith("ft Left/3"): #Special case for ESOK
                            #    s,=re.match("ft Left/3.*?([A-Z]{4,}.*)",s).groups()
                            # m=re.match("ft Left/\d+.*?([A-Z]{4,}.*)",s)
                            # if m:
                            #    s,=m.groups()

                            if s.startswith("LjUNG"):  # Really strange problem with ESCF
                                s = s[0] + "J" + s[2:]
                            if s.lower().startswith("holding"):
                                sl = s.split(" ", 1)
                                if len(sl) > 1:
                                    s = sl[1]
                            s = s.strip()
                            if kind == "entry/exit point" and s.startswith("HOLDING"):
                                continue  # reached HOLDING-part of VAC

                            # Check for other headings
                            # Fixup strange formatting of points in some holding items: (whitespace between coord and 'E')
                            s = re.sub(ur"(\d+)\s*(N)\s*(\d+)\s*(E)", lambda x: "".join(x.groups()), s)

                            m = re.match(r"([A-Z]{2,}).*?(\d+N)\s*(\d+E).*", s)
                            if not m:
                                m = re.match(r".*?(\d+N)\s*(\d+E).*", s)
                                if not m:
                                    continue
                                assert m
                                lat, lon = m.groups()
                                # skavsta
                                if icao == "ESKN":
                                    if s.startswith(u"Hold north of T"):
                                        name = "NORTH"
                                    elif s.startswith(u"Hold south of B"):
                                        name = "SOUTH"
                                    else:
                                        assert 0
                                # add more specials here
                                else:
                                    continue
                            else:
                                name, lat, lon = m.groups()
                            try:
                                coord = parse_coords(lat, lon)
                            except Exception:
                                print "Couldn't parse:", lat, lon
                                continue
                            # print name,lat,lon,mapper.format_lfv(*mapper.from_str(coord))

                            if name.count("REMARK") or len(name) <= 2:
                                print "Suspicious name: ", name
                                # sys.exit(1)
                                continue
                            points[icao + " " + name] = dict(name=icao + " " + name, icao=icao, pos=coord, kind=kind)

    # for point in points.items():
    #    print point

    # sys.exit(1)

    def fixhex11(s):
        out = []
        for c in s:
            i = ord(c)
            if i >= 0x20:
                out.append(c)
                continue
            if i in [0x9, 0xA, 0xD]:
                out.append(c)
                continue
            out.append(" ")

        return "".join(out)

    for ad in ads:
        icao = ad["icao"]
        if icao in big_ad:
            # print "Parsing ",icao
            p = Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_en.pdf" % (icao, icao), loadhook=fixhex11)
            ad["aiptexturl"] = p.get_url()
            firstpage = p.parse_page_to_items(0)
            te = "\n".join(firstpage.get_all_lines())
            # print te
            coords = re.findall(r"ARP.*(\d{6}N)\s*(\d{7}E)", te)
            if len(coords) > 1:
                raise Exception(
                    "First page of airport info (%s) does not contain exactly ONE set of coordinates" % (icao,)
                )
            if len(coords) == 0:
                print "Couldn't find coords for ", icao
            # print "Coords:",coords
            ad["pos"] = parse_coords(*coords[0])

            elev = re.findall(r"Elevation.*?(\d{1,5})\s*ft", te, re.DOTALL)
            if len(elev) > 1:
                raise Exception(
                    "First page of airport info (%s) does not contain exactly ONE elevation in ft" % (icao,)
                )
            if len(elev) == 0:
                print "Couldn't find elev for ", icao
            ad["elev"] = int(elev[0])
            freqs = []
            found = False
            thrs = []
            # uprint("-------------------------------------")
            for pagenr in xrange(p.get_num_pages()):
                page = p.parse_page_to_items(pagenr)
                # uprint("Looking on page %d"%(pagenr,))
                if (
                    0
                ):  # opening hours are no longer stored in a separate document for any airports. No need to detect which any more (since none are).
                    for item in page.get_by_regex(".*OPERATIONAL HOURS.*"):
                        lines = page.get_lines(page.get_partially_in_rect(0, item.y2 + 0.1, 100, 100))
                        for line in lines:
                            things = ["ATS", "Fuelling", "Operating"]
                            if not line.count("AIP SUP"):
                                continue
                            for thing in things:
                                if line.count(thing):
                                    ad["aipsup"] = True

                for item in page.get_by_regex(".*\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*.*"):
                    # uprint("Physical char on page")
                    lines = page.get_lines(page.get_partially_in_rect(0, item.y2 + 0.1, 100, 100))
                    seen_end_rwy_text = False
                    for line, nextline in izip(lines, lines[1:] + [None]):
                        # uprint("MAtching: <%s>"%(line,))
                        if re.match(ur"AD\s+2.13", line):
                            break
                        if line.count("Slope of"):
                            break
                        if line.lower().count("end rwy:"):
                            seen_end_rwy_text = True
                        if line.lower().count("bgn rwy:"):
                            seen_end_rwy_text = True
                        m = re.match(ur".*(\d{6}\.\d+)[\s\(\)\*]*(N).*", line)
                        if not m:
                            continue
                        m2 = re.match(ur".*(\d{6,7}\.\d+)\s*[\s\(\)\*]*(E).*", nextline)
                        if not m2:
                            continue
                        latd, n = m.groups()
                        lond, e = m2.groups()
                        assert n == "N"
                        assert e == "E"
                        lat = latd + n
                        lon = lond + e
                        rwytxts = page.get_lines(page.get_partially_in_rect(0, line.y1 + 0.05, 12, nextline.y2 - 0.05))
                        uprint("Rwytxts:", rwytxts)
                        rwy = None
                        for rwytxt in rwytxts:
                            # uprint("lat,lon:%s,%s"%(lat,lon))
                            # uprint("rwytext:",rwytxt)
                            m = re.match(ur"\s*(\d{2}[LRCM]?)\b.*", rwytxt)
                            if m:
                                assert rwy == None
                                rwy = m.groups()[0]
                        if rwy == None and seen_end_rwy_text:
                            continue
                        print "Cur airport:", icao
                        already = False
                        assert rwy != None
                        seen_end_rwy_text = False
                        for thr in thrs:
                            if thr["thr"] == rwy:
                                raise Exception("Same runway twice on airfield:" + icao)
                        thrs.append(dict(pos=mapper.parse_coords(lat, lon), thr=rwy))
            assert len(thrs) >= 2
            for pagenr in xrange(0, p.get_num_pages()):
                page = p.parse_page_to_items(pagenr)

                matches = page.get_by_regex(r".*ATS\s+COMMUNICATION\s+FACILITIES.*")
                # print "Matches of ATS COMMUNICATION FACILITIES on page %d: %s"%(pagenr,matches)
                if len(matches) > 0:
                    commitem = matches[0]
                    curname = None

                    callsign = page.get_by_regex_in_rect(ur"Call\s*sign", 0, commitem.y1, 100, commitem.y2 + 8)[0]

                    for idx, item in enumerate(
                        page.get_lines(
                            page.get_partially_in_rect(callsign.x1 - 0.5, commitem.y1, 100, 100),
                            fudge=0.3,
                            order_fudge=15,
                        )
                    ):
                        if item.strip() == "":
                            curname = None
                        if re.match(".*RADIO\s+NAVIGATION\s+AND\s+LANDING\s+AIDS.*", item):
                            break
                        # print "Matching:",item
                        m = re.match(r"(.*?)\s*(\d{3}\.\d{1,3})\s*MHz.*", item)
                        # print "MHZ-match:",m
                        if not m:
                            continue
                        # print "MHZ-match:",m.groups()
                        who, sfreq = m.groups()
                        freq = float(sfreq)
                        if abs(freq - 121.5) < 1e-4:
                            if who.strip():
                                curname = who
                            continue  # Ignore emergency frequency, it is understood
                        if not who.strip():
                            if curname == None:
                                continue
                        else:
                            curname = who
                        freqs.append((curname.strip().rstrip("/"), freq))

            for pagenr in xrange(0, p.get_num_pages()):
                page = p.parse_page_to_items(pagenr)

                matches = page.get_by_regex(r".*ATS\s*AIRSPACE.*")
                # print "Matches of ATS_AIRSPACE on page %d: %s"%(pagenr,matches)
                if len(matches) > 0:
                    heading = matches[0]
                    desigitem, = page.get_by_regex("Designation and lateral limits")
                    vertitem, = page.get_by_regex("Vertical limits")
                    airspaceclass, = page.get_by_regex("Airspace classification")

                    lastname = None
                    subspacelines = dict()
                    subspacealts = dict()
                    for idx, item in enumerate(
                        page.get_lines(page.get_partially_in_rect(desigitem.x2 + 1, desigitem.y1, 100, vertitem.y1 - 1))
                    ):

                        if item.count("ATS airspace not established"):
                            assert idx == 0
                            break

                        if item.strip() == "":
                            continue
                        m = re.match(r"(.*?)(\d{6}N\s+.*)", item)
                        if m:
                            name, coords = m.groups()
                            name = name.strip()
                        else:
                            name = item.strip()
                            coords = None
                        if name:
                            lastname = name
                        if coords:
                            subspacelines.setdefault(lastname, []).append(coords)
                        assert lastname
                    lastname = None

                    # print "Spaces:",subspacelines
                    # print "ICAO",ad['icao']
                    # altlines=page.get_lines(page.get_partially_in_rect(vertitem.x2+1,vertitem.y1,100,airspaceclass.y1-0.2))

                    # print "Altlines:",altlines
                    subspacealts = dict()
                    subspacekeys = subspacelines.keys()

                    allaltlines = " ".join(
                        page.get_lines(
                            page.get_partially_in_rect(
                                vertitem.x1 + 0.5, vertitem.y1 + 0.5, 100, airspaceclass.y1 - 0.2
                            )
                        )
                    )
                    single_vertlim = False
                    totalts = list(mapper.parse_all_alts(allaltlines))
                    # print "totalts:",totalts
                    if len(totalts) == 2:
                        single_vertlim = True

                    for subspacename in subspacekeys:
                        ceil = None
                        floor = None
                        subnames = [subspacename]
                        if subspacename.split(" ")[-1].strip() in ["TIA", "TIZ", "CTR", "CTR/TIZ"]:
                            subnames.append(subspacename.split(" ")[-1].strip())
                        # print "Parsing alts for ",subspacename,subnames
                        try:
                            for nametry in subnames:
                                if (
                                    single_vertlim
                                ):  # there's only one subspace, parse all of vertical limits field for this single one.
                                    items = [vertitem]
                                else:
                                    items = page.get_by_regex_in_rect(
                                        nametry, vertitem.x2 + 1, vertitem.y1, 100, airspaceclass.y1 - 0.2
                                    )
                                for item in items:
                                    alts = []
                                    for line in page.get_lines(
                                        page.get_partially_in_rect(
                                            item.x1 + 0.5, item.y1 + 0.5, 100, airspaceclass.y1 - 0.2
                                        )
                                    ):
                                        # print "Parsing:",line
                                        line = line.replace(nametry, "").lower().strip()
                                        parsed = list(mapper.parse_all_alts(line))
                                        if len(parsed):
                                            alts.append(mapper.altformat(*parsed[0]))
                                        if len(alts) == 2:
                                            break
                                    if alts:
                                        # print "alts:",alts
                                        ceil, floor = alts
                                        raise StopIteration
                        except StopIteration:
                            pass
                        assert ceil and floor
                        subspacealts[subspacename] = dict(ceil=ceil, floor=floor)

                    spaces = []
                    for spacename in subspacelines.keys():
                        altspacename = spacename
                        # print "Altspacename: %s, subspacesalts: %s"%(altspacename,subspacealts)
                        space = dict(
                            name=spacename,
                            ceil=subspacealts[altspacename]["ceil"],
                            floor=subspacealts[altspacename]["floor"],
                            points=parse_coord_str(" ".join(subspacelines[spacename])),
                            freqs=list(set(freqs)),
                        )

                        if True:
                            vs = []
                            for p in space["points"]:
                                x, y = mapper.latlon2merc(mapper.from_str(p), 13)
                                vs.append(Vertex(int(x), int(y)))
                            p = Polygon(vvector(vs))
                            if p.calc_area() <= 30 * 30:
                                pass  # print space
                                pass  # print "Area:",p.calc_area()
                            assert p.calc_area() > 30 * 30
                            # print "Area: %f"%(p.calc_area(),)

                        spaces.append(space)
                        # print space
                    ad["spaces"] = spaces
                    found = True
                if found:
                    break
            assert found
            ad["runways"] = rwy_constructor.get_rwys(thrs)
                if fl: return fl
                if alt: return alt+"FT MSL"
                if gnd: return "GND"
                if unl: return "UNL"
            ceiling,floor=[fixupalt(h) for h in [h1,h2]]
            if mapper.parse_elev(floor)>=9500:
                continue
            kind,name=re.match("EP (TSA|TRA|TFR) ([\d\w]+)",tra.text).groups()            
            def fix_coords(s):
                
                def fixer(m):
                    a,b,c,d, e,f,g,h=m.groups()
                    return "%02d%02d%02d%s %03d%02d%02d%s - "%(int(a),int(b),int(c),d,
                                                               int(e),int(f),int(g),h)
                return re.sub(ur"(\d{2,3})°(\d{2})'(\d{2})''([NS])\s*(\d{2,3})°(\d{2})'(\d{2})''([EW])",fixer,s)
            coordstr2=fix_coords("".join(o)).rstrip().rstrip("-")
            print "COordstr:",coordstr2
            spaces.append(dict(
                name="EP %s %s"%(kind,name),
                points=mapper.parse_coord_str(coordstr2,context="poland"),
                ceiling=ceiling,
                floor=floor,
                type="TSA",
                freqs=[]
                    ))
    return spaces
if __name__=='__main__':
    for space in ep_parse_tra():
        print "space",space
        
    
def ee_parse_airfields2():
    ads=[]
    spaces=[]
    airac_date=get_airac_date()
    print "airac",airac_date
    overview_url="/%s/html/eAIP/EE-AD-0.6-en-GB.html"%(airac_date,)
        
    parser=lxml.html.HTMLParser()
    data,date=fetchdata.getdata(overview_url,country='ee')
    parser.feed(data)
    tree=parser.close()
    icaos=[]
    for cand in tree.xpath(".//h3"):
        txts=alltexts(cand.xpath(".//a"))
        aps=re.findall(r"EE[A-Z]{2}"," ".join(txts))
        if aps:
            icao,=aps
            if alltext(cand).count("HELIPORT"):
                print "Ignore heliport",icao
                continue
            icaos.append(icao)
    
    for icao in icaos:
        ad=dict(icao=icao)
        url="/%s/html/eAIP/EE-AD-2.%s-en-GB.html"%(airac_date,icao)
        data,date=fetchdata.getdata(url,country='ee')
        parser.feed(data)
        tree=parser.close()
        thrs=[]


        
        for h3 in tree.xpath(".//h3"):
            txt=alltext(h3)
            print repr(txt)
            ptrn=ur"\s*%s\s+[—-]\s+(.*)"%(unicode(icao.upper()),)
            m=re.match(ptrn,txt,re.UNICODE)
            if m:
                assert not 'name' in ad
                ad['name']=m.groups()[0]
                
        for tr in tree.xpath(".//tr"):
            txt=alltext(tr)
            m=re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*",txt)
            #print "Matching,",txt,":",m 
            if m:
                crds,=m.groups()
                ad['pos']=mapper.anyparse(crds)
                
        space=dict()
        for table in tree.xpath(".//table"):
            for tr in table.xpath(".//tr"):
                trtxt=alltext(tr)
                if trtxt.count("Designation and lateral limits"):
                    space=dict()
                    coords=tr.getchildren()[2]
                    lines=alltext(coords).split("\n")
                    if lines[0].strip()=='NIL':
                        continue
                    
                    
                    zname,what,spill=re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)",lines[0]).groups()
                    if spill and spill.strip():
                        rest=[spill]+lines[1:]
                    else:
                        rest=lines[1:]
                    what=what.strip()
                    assert ad['name'].upper().strip().count(zname.upper().strip())
                    assert what in ['FIZ','TIZ','CTR']
                    space['type']=what
                    space['points']=mapper.parse_coord_str("\n".join(rest))

                    space['name']=zname+" "+what
                    space['date']=date
                    space['url']=fetchdata.getrawurl(url,'ee')
                 
                    
                if trtxt.count("Vertical limits"):
                    vlim=alltext(tr.getchildren()[2])
                    if vlim.strip()=='NIL': continue
                    space['floor'],space['ceiling']=vlim.split(" to ")
                    
                #space['freqs']=x
                
        #hlc=False
        for h4 in tree.xpath(".//h4"):
            txt=alltext(h4)
            if txt.lower().count("charts"):
                par=h4.getparent()
                for table in par.xpath(".//table"):
                    for idx,tr in enumerate(table.xpath(".//tr")):
                        name,page=\
                            tr.getchildren()
                        nametxt=alltext(name)
                        print "nametxt:",nametxt,"link:"
                        for reg,variant in [
                                           (r"Aerodrome.*Chart.*","") ,
                                           (r"Landing.*Chart.*","landing"), 
                                           (r".*Parking.*Chart.*","parking"), 
                                           (r".*Visual.*Approach.*|.*\bVAC\b.*","vac")
                                            ]:
                            if re.match(reg,nametxt):
                                for a in page.xpath(".//a"):
                                    print "linklabel",a.text
                                    print "attrib:",a.attrib
                                    href=a.attrib['href']
                                    print "Bef repl",href
                                    if href.lower().endswith("pdf"):
                                        href=href.replace("../../graphics","/%s/graphics"%(airac_date,))
                                        print "href:",href,airac_date
                                        assert href
                                        parse_landing_chart.help_plc(ad,href,
                                                        icao,ad['pos'],"ee",variant=variant)
                                        """arp=ad['pos']
                                        lc=parse_landing_chart.parse_landing_chart(
                                                href,
                                                icao=icao,
                                                arppos=arp,country="ee")
                                        assert lc
                                        if lc:
                                            ad['adcharturl']=lc['url']
                                            ad['adchart']=lc
                                            hlc=True
                                            #chartblobnames.append(lc['blobname'])
                                        """                                                    
        #assert hlc
        for h4 in tree.xpath(".//h4"):
            txt=alltext(h4)
            if txt.count("RUNWAY PHYSICAL"):
                par=h4.getparent()

                for table in par.xpath(".//table"):
                    prevnametxt=""
                    for idx,tr in enumerate(table.xpath(".//tr")):
                        if idx==0:
                            fc=alltext(tr.getchildren()[0])
                            print "FC",fc
                            if not fc.count("Designations"):
                                break #skip table
                        if idx<2:continue
                        if len(tr.getchildren())==1:continue
                        print "c:",tr.getchildren(),alltexts(tr.getchildren())
                        desig,trubrg,dims,strength,thrcoord,threlev=tr.getchildren()
                        rwy=re.match(r"(\d{2}[LRC]?)",alltext(desig))
                        altc=alltext(thrcoord)
                        print "Matching",altc
                        print "rwymatch:",alltext(desig)
                        m=re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*",altc,re.DOTALL|re.MULTILINE)                        
                        if m:
                            lat,lon=m.groups()
                            print "Got latlon",lat,lon
                            thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy.groups()[0]))         
                        
                                
        space['freqs']=[]
        for h4 in tree.xpath(".//h4"):
            txt=alltext(h4)
            if txt.count("ATS COMMUNICATION"):
                par=h4.getparent()
                for table in par.xpath(".//table"):
                    for idx,tr in enumerate(table.xpath(".//tr")):
                        print "cs",repr(tr.getchildren()),alltexts(tr.getchildren())
                        print len(tr.getchildren())
                        if len(tr.getchildren())!=5:
                            if "".join(alltexts(tr.getchildren())).count(u"EMERG"):
                                continue #Sometimes emergency freq is listed, and then it is without callsign
                        service,callsign,frequency,hours,remarks=\
                            tr.getchildren()
                        callsigntxt=alltext(callsign)
                        if idx<2:
                            if idx==0:
                                assert callsigntxt.strip()=="Call sign"
                            if idx==1:
                                 assert callsigntxt.strip()=="2"
                            continue
                        ftext=alltext(frequency)
                        print "matching freq",ftext
                        for freq in re.findall(ur"\b\d{3}\.\d{1,3}",ftext):
                            freqmhz=float(freq)                            
                            space['freqs'].append((callsigntxt.strip(),freqmhz))
                              
        if space and 'points' in space:
            assert 'freqs' in space
            assert 'points' in space
            assert 'floor' in space
            assert 'ceiling' in space
            assert 'type' in space
            spaces.append(space)
        if thrs:
            ad['runways']=rwy_constructor.get_rwys(thrs)
            
        aip_text_documents.help_parse_doc(ad,url,
                        icao,"ee",title="General Information",category="general")
            
        ad['date']=date
        ad['url']=fetchdata.getrawurl(url,'ee')   
        print "AD:",ad
        assert 'pos' in ad
        assert 'name' in ad
        ads.append(ad)
Beispiel #26
0
            heights = ['GND', 'FL95']
        if len(heights) == 1 and d['name'] == u'Göteborg TMA':
            heights = ['4500', 'FL95']
        assert len(heights) == 2
        ceiling = heights[0].strip()
        floor = heights[1].strip()

        pa['name'] = d['name']
        pa['floor'] = floor
        pa['ceiling'] = ceiling
        if mapper.parse_elev(floor) >= 9500:
            continue
        #uprint("Arealines:\n================\n%s\n============\n"%(arealines[:last_coord_idx]))
        #print pa
        areacoords = " ".join(arealines[:last_coord_idx])
        pa['points'] = parse_coord_str(areacoords)

        vs = []
        for p in pa['points']:
            #print "from_str:",repr(p)
            x, y = mapper.latlon2merc(mapper.from_str(p), 13)
            vs.append(Vertex(int(x), int(y)))

        p = Polygon(vvector(vs))
        if p.calc_area() <= 30 * 30:
            pass  #print pa
            #print "Area:",p.calc_area()
        assert p.calc_area() > 30 * 30
        #print "Area: %f"%(p.calc_area(),)
        #print "Point-counts:",len(pa['points'])
        for p in pa['points']:
Beispiel #27
0
def ee_parse_gen_r2(url):
    spaces=[]
    parser=lxml.html.HTMLParser()
    data,date=fetchdata.getdata(url,country='ee')
    parser.feed(data)
    tree=parser.close()
    print "Parsed tree"
    for tab in tree.xpath(".//table"):
        print "Found table"
        for idx,cand in enumerate(tab.xpath(".//tr")):
            if len(cand.getchildren())<3:
                continue
            space=dict()
            #print list(cand.getchildren())
            what,vert,remark=list(cand.getchildren())[0:3]         
            whattxt=alltext(what).replace(u"–","-").replace(u"\xa0"," ")
            
            verttxt=alltext(vert)
            
            while True:
                w=re.sub(ur"\(.*?\)","",whattxt)
                if w!=whattxt:
                    whattxt=w 
                    continue
                break
            
            #print idx,whattxt
            if idx<3:
                if idx==1: assert (whattxt.count("Identification") or whattxt.count("ateral limits"))
                if idx==2: assert whattxt.strip()=="1"
                continue 
            verttxt=verttxt.replace(u"\xa0",u" ")
            vertlines=[x for x in verttxt.split("\n") if x.strip()]
            if len(vertlines)==1:
                vertlines=[x for x in verttxt.split("  ") if x.strip()]
            print "Verlintes:",repr(vertlines)
            #print "wha------------------------ t",whattxt
            space['ceiling'],space['floor']=vertlines[:2]
            mapper.parse_elev(space['ceiling'])
            ifloor=mapper.parse_elev(space['floor'])
            if ifloor>=9500: continue
            lines=whattxt.split("\n")
            out=[]
            merged=""
            for line in lines[1:]:
                line=line.strip().replace(u"–","-")
                if line=="":continue
                if line.endswith("point"):
                    out.append(line+" ")
                    continue
                if line.endswith("ircle with radius of") or line.endswith(",") or line.endswith("on") or line.endswith("radius"):
                    merged=" ".join([merged,line])
                    print "<---Merged:",merged
                    continue
                if merged:
                    line=" ".join([merged,line])
                merged=""
                if not line.endswith("-"):
                    line=line+" -"
                out.append(line+"\n")
            
            space['name']=lines[0].strip()
            w="".join(out)
            print "Parsing:",w
            if space['name'].startswith('EER1 '):                
                w=ee_parse_tma2.eer1txt
                fir=mapper.parse_coord_str(ee_parse_tma2.firtxt,context='estonia')
                fir_context=[fir]
                space['points']=mapper.parse_coord_str(w,fir_context=fir_context)
            else:
                space['points']=mapper.parse_coord_str(w,context='estonia')
            space['type']='R'
            space['date']=date
            space['freqs']=[]
            space['url']=fetchdata.getrawurl(url,'ee')            
            spaces.append(space)
    return spaces
Beispiel #28
0
def ev_parse_tma():
    out = []
    parser = lxml.html.HTMLParser()
    # url="/Latvia_EV-ENR-2.1-en-GB.html"
    cur_airac = get_cur_airac()
    url = "/eAIPfiles/%s-AIRAC/html/eAIP/EV-ENR-2.1-en-GB.html" % (cur_airac,)

    data, date = fetchdata.getdata(url, country="ev")
    parser.feed(data)
    tree = parser.close()

    got_fir = False
    for table in tree.xpath("//table"):
        # print "Table with %d children"%(len(table.getchildren()),)
        rows = list(table.xpath(".//tr"))
        for idx in xrange(5):
            headingrow = rows[idx]
            cols = list(headingrow.xpath(".//th"))
            # print len(cols)
            if len(cols) == 5:
                break
        else:
            raise Exception("No heading row")
        assert idx == 0
        # for idx,col in enumerate(cols):
        #    print "Col %d, %s"%(idx,alltext(col)[:10])
        nameh, unith, callsignh, freqh, remarkh = cols
        assert alltext(nameh).lower().count("name")
        assert alltext(unith).lower().count("unit")
        assert re.match(ur"call\s*sign", alltext(callsignh).lower())
        lastcols = None
        for row in rows[1:]:
            cols = list(row.xpath(".//td"))
            if len(cols) == 5:
                name, unit, callsign, freq, remark = cols
                lastcols = cols
            else:
                if lastcols:
                    unit, callsign, freq, remark = lastcols[1:]
                    name = cols[0]
                else:
                    continue

            lines = [x.strip() for x in alltext(name).split("\n") if x.strip()]
            if len(lines) == 0:
                continue
            spacename = lines[0].strip()

            if re.match(ur"RIGA\s*UTA|RIGA\s*CTA|RIGA\s*AOR.*", spacename):
                continue
            freqstr = alltext(freq)
            callsignstr = alltext(callsign)
            if freqstr.strip():
                print freqstr
                freqmhzs = re.findall(ur"\d{3}\.\d{3}", freqstr)
                assert len(freqmhzs) <= 2
                callsigns = [callsignstr.split("\n")[0].strip()]
                freqs = []
                for idx, freqmhz in enumerate(freqmhzs):
                    if freqmhz == "121.500":
                        continue
                    freqs.append((callsigns[idx], float(freqmhz)))
                print "freqs:", freqs
            else:
                freqs = []
            assert len(lines)

            classidx = next(idx for idx, x in reversed(list(enumerate(lines))) if x.lower().count("class of airspace"))

            if re.match(ur"RIGA\s*FIR.*UIR", spacename, re.UNICODE):
                got_fir = True
                lastspaceidx = classidx - 2
                floor = "GND"
                ceiling = "-"
                type_ = "FIR"
            else:
                if lines[classidx - 1].count("/") == 1:
                    floor, ceiling = lines[classidx - 1].split("/")
                    lastspaceidx = classidx - 1
                else:
                    floor = lines[classidx - 1]
                    ceiling = lines[classidx - 2]
                    lastspaceidx = classidx - 2
                ceiling = strangefix(ceiling)
                floor = strangefix(floor)

                mapper.parse_elev(ceiling)
                mapper.parse_elev(floor)
                type_ = "TMA"
            tcoords = lines[1:lastspaceidx]
            # verify that we got actual altitudes:
            coords = []
            for coord in tcoords:
                coord = coord.strip().replace("(counter-)", "").replace("(RIGA DVOR - RIA)", "")
                if coord.endswith(u"E") or coord.endswith("W"):
                    coord = coord + " -"
                coords.append(coord)

            raw = " ".join(coords)
            raw = re.sub(s(ur"Area bounded by lines successively joining the following points:"), "", raw)
            print "Raw:", raw
            coords = mapper.parse_coord_str(raw, context="latvia")
            for cleaned in clean_up_polygon(coords):
                out.append(
                    dict(
                        name=spacename,
                        points=cleaned,
                        type=type_,
                        freqs=freqs,
                        floor=floor,
                        url=url,
                        date=date,
                        ceiling=ceiling,
                    )
                )
                if type_ == "FIR":
                    out[-1]["icao"] = "EVRR"
            else:
                raise Exception("No limitstr")

            cstr = []
            spacename = coordstr[0]
            assert spacename == "CTR"
            for sub in coordstr[1:]:
                cstr.append(sub.strip().rstrip("."))

            def fixfunc(m):
                return "".join(m.groups())

            raw = re.sub(ur"(\d{2,3})\s*(\d{2})\s*(\d{2})\s*([NSEW])", fixfunc,
                         "".join(cstr)).replace(",", " - ")
            print "parsing raw:", raw
            points = mapper.parse_coord_str(raw, context='lithuania')

            print "Limitstr", limitstr
            floor, ceiling = re.match(ur"(.*)\s*to\s*(.*)", limitstr).groups()
            mapper.parse_elev(floor)
            mapper.parse_elev(ceiling)

            spacenamestem = spacename.strip()
            if spacenamestem.endswith("CTR"):
                spacenamestem = spacenamestem[:-3].strip()
            if spacenamestem.endswith("FIZ"):
                spacenamestem = spacenamestem[:-3].strip()
            #construct names
            newfreqs = []
            for serv, freq in freqs:
                serv = serv.strip()
Beispiel #30
0
def ee_parse_tma2():
    spaces=[]
    airac_date=get_airac_date()    
    url="/%s/html/eAIP/EE-ENR-2.1-en-GB.html"%(airac_date,)
    parser=lxml.html.HTMLParser()
    data,date=fetchdata.getdata(url,country='ee')
    parser.feed(data)
    tree=parser.close()
    icaos=[]
    def nested(tab):
        if tab==None: return False
        if tab.getparent() is None:
            return False
        #print dir(tab)
        if tab.tag=='table':
            return True
        return nested(tab.getparent())
    for tab in tree.xpath(".//table"):
        print "table alltext:",alltext(tab)
        if nested(tab.getparent()): continue
        firsttr=tab.xpath(".//tr")[0]
        ntext=alltext(firsttr)
        print "firsttr",firsttr
        print "ntext",ntext
        if re.match(ur".*FIR\s*/\s*CTA.*",ntext):
            print "Matches Tallin FIR"
            name='TALLIN FIR'
            points=mapper.parse_coord_str(firtxt,context='estonia')
            floor,ceiling="GND","FL195"
            space={}
            space['name']=name
            space['points']=points
            space['floor']=floor
            space['ceiling']=ceiling
            space['freqs']=[]
            space['icao']='EETT'
            space['type']='FIR'
            space['date']=date
            space['url']=fetchdata.getrawurl(url,'ee')
            spaces.append(space)            
            continue
        else:
            name=ntext.strip()
        space=dict(name=name)
        print "Name",name
        assert space['name'].count("TMA") \
            or space['name'].count("FIR")
        if space['name'].count("FIR"):
            type='FIR'            
        else:
            type="TMA"
        freqs=[]
        points=None
        floor=None
        ceiling=None
        for cand in tab.xpath(".//tr"):
            if len(cand.getchildren())!=2:
                continue
            nom,what=cand.getchildren()            
            whattxt=alltext(what)
            nomtxt=alltext(nom)
            print "nomtxt",nomtxt,"space name",space['name']
            if nomtxt.count("Lateral limits"):
                if space['name'].count("TALLINN TMA 2"):
                    points=mapper.parse_coord_str("""                
                        A circle with radius 20 NM centred on 592448N 0244957E
                        """)
                else:               
                    whattxt=whattxt.replace(
                        "then along the territory dividing line between Estonia and Russia to",
                        "- Along the common Estonian/X state boundary to " 
                        )
                    print "Fixed up",whattxt
                    points=mapper.parse_coord_str(whattxt,context='estonia')
            if nomtxt.count("Vertical limits"):
                floor,ceiling=whattxt.split(" to ")
            if nomtxt.count("Call sign"):
                callsign=whattxt.split("\n")[0]
            if nomtxt.count("freq"):
                freqs.extend(re.findall(ur"\d+\.\d+\s*MHz"))
                
        assert points and floor and ceiling
        space['points']=points
        space['type']=type
        space['floor']=floor
        space['ceiling']=ceiling
        space['freqs']=[]
        space['type']=type
        space['date']=date
        space['url']=fetchdata.getrawurl(url,'ee')
        for freq in freqs:
            space['freqs'].append((callsign,freq))
        spaces.append(space)
                    lat, lon = m.groups()
                    return lat.replace(" ", "") + " " + lon.replace(" ", "")

                areaspec = re.sub(ur"([\d ]+N)\s*([\d ]+E)", fixup, areaspec)

                areaspec = re.sub(
                    ur"\(.*/\s*equal\s*to\s*Malmi\s*CTR\s*lateral\s*limits\)",
                    "", areaspec)
                #print "Fixed areaspec",areaspec
                #if icao=="EFKS":
                #    areaspec=areaspec.replace("6615 28N","661528N")
                #Error! REstriction areas!
                spaces.append(
                    dict(name=spacename,
                         type="CTR",
                         points=mapper.parse_coord_str(areaspec)))
                if line.count("Vertical limits"):
                    #print "Breaking"
                    break
            while not line.count("Vertical limits"):
                line = lines.next()
            #print "Matching veritcal limits--------------------------------"
            oldspaces = spaces
            spaces = []
            for space in oldspaces:
                if space['name'].count("/"):
                    a, b = space['name'].split("/")
                    spaces.append(dict(space, name=a.strip()))
                    spaces.append(dict(space, name=b.strip()))
                else:
                    spaces.append(space)
Beispiel #32
0
from datetime import datetime
import fplan.lib.mapper as mapper
import re
from fplan.lib.poly_cleaner import clean_up_polygon

def ey_parse_tma():
    out=[]
    
    def emit(name,coordstr,limits,type="TMA",freqs=[],date=datetime(2011,03,25),icao=None):
        ceiling,floor=limits.split("/")
        def compact(m):
            return "".join(m.groups())
        coordstr=re.sub(ur"(\d{2,3})\s*(\d{2})\s*(\d{2})",compact,coordstr)
        coordstr=re.sub(ur"NM from KNA to ","NM from 545740N 0240519E to",coordstr)
        print coordstr
        tpoints=mapper.parse_coord_str(coordstr,context='lithuania')
        f1=mapper.parse_elev(floor)
        c1=mapper.parse_elev(ceiling)
        if c1!='-':
            assert c1>f1
        for points in clean_up_polygon(tpoints):
            out.append(
                dict(
                     name=name,
                     floor=floor,
                     ceiling=ceiling,
                     freqs=freqs,
                     points=points,
                     type=type
                     )
            )
Beispiel #33
0
             for line in reversed(lines):
                 if re.match(ur"[A-ZÅÄÖ ]{3,}",line):                        
                     #print "freqname Matched:",line
                     fname=line.strip()
                     break
             if not fname: raise Exception("Found no frequency name for freq: "+freq)
             freqs.append((fname,float(freq)))
         if len(freqs): break
     
     (ceiling,ceilingy),(floor,floory)=verts
     assert ceilingy<floory
     assert floory-ceilingy<5.0
     #uprint("Analyzing area for %s"%(name,))
     assert "".join(areaspec).strip()!=""
     print areaspec
     area=mapper.parse_coord_str("".join(areaspec))
     #uprint("Done analyzing %s"%(name,))
     #print area
     if name.count("CTA") and name.count("TMA")==0:
         type_="CTA"
     else:
         type_="TMA"
     
     out.append(dict(
         floor=floor,
         ceiling=ceiling,
         freqs=freqs,
         type=type_,
         name=name,
         points=area))
 
Beispiel #34
0
def ev_parse_x(url):
    out = []
    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(url, country="ev")
    parser.feed(data)
    tree = parser.close()
    got_fir = False
    for table in tree.xpath("//table"):
        #print "Table with %d children"%(len(table.getchildren()),)
        rows = list(table.xpath(".//tr"))

        #for idx,col in enumerate(cols):
        #    print "Col %d, %s"%(idx,alltext(col)[:10])
        headingcols = rows[0].xpath(".//th")
        if len(headingcols) == 0: continue
        name, alt = headingcols[0:2]
        if alltext(name).count("QNH") and len(headingcols) > 6:
            continue
        print alltext(name)
        assert alltext(name).lower().count("name") or alltext(
            name).lower().count("lateral")
        print alltext(alt)
        assert alltext(alt).lower().count("limit")

        for row in rows[1:]:
            cols = list(row.xpath(".//td"))
            if len(cols) < 2: continue
            name, alt = cols[:2]
            lines = [x.strip() for x in alltext(name).split("\n") if x.strip()]
            if len(lines) == 0: continue
            assert len(lines)

            spacename = lines[0].strip()
            if spacename.strip(
            ) == "A circle radius 0,5 NM centered on 565705N 0240619E EVR2 RIGA":
                spacename = "EVR2 RIGA"
                lines = [spacename, lines[0][:-len(spacename)].strip()
                         ] + lines[1:]
            print spacename
            if spacename.strip() == "SKRIVERI":
                continue
            print "Spacename is:", spacename
            assert spacename[:3] in ["EVR","EVP","TSA","TRA"] or \
                spacename.endswith("ATZ") or \
                spacename.endswith("ATZ (MILITARY)")

            altcand = []
            for altc in alltext(alt).split("\n"):
                if altc.count("Real-time"): continue
                altcand.append(altc.strip())
            print "Altcands:", altcand
            ceiling, floor = [x.strip() for x in " ".join(altcand).split("/")]
            ceiling = strangefix(ceiling)
            floor = strangefix(floor)

            mapper.parse_elev(ceiling)
            ifloor = mapper.parse_elev(floor)
            iceiling = mapper.parse_elev(ceiling)
            if ifloor >= 9500 and iceiling >= 9500:
                continue
            assert ifloor < iceiling

            freqs = []
            raw = " ".join(lines[1:])
            raw = re.sub(
                s(ur"Area bounded by lines successively joining the following points:"
                  ), "", raw)
            print "Raw:", raw

            coords = mapper.parse_coord_str(raw, context='latvia')
            for cleaned in clean_up_polygon(coords):
                out.append(
                    dict(name=spacename,
                         points=cleaned,
                         type="R",
                         freqs=freqs,
                         floor=floor,
                         url=url,
                         date=date,
                         ceiling=ceiling))

    return out
Beispiel #35
0
def fi_parse_tma():
    p=parse.Parser(r"/ais/eaip/pdf/enr/EF_ENR_2_1_EN.pdf",fixuphref,country='fi')
	
    res=[]    
    atsres=[]
    for pagenr in xrange(4,p.get_num_pages()): 
        parsed,atsparsed=parse_page(p,pagenr)#pagenr)
        res.extend(parsed)
        atsres.extend(atsparsed)        
        #break
        
    
    print "Len ouf out ",len(res)
    atsout=[]    
    for space in atsres:
        #print "bef cut:",space['points']
        mypolys=[makepoly.poly(space['points'])]    
        for tmaitem in res:
            if tmaitem['type']!='TMA': continue
            outmypolys=[]
            assert len(mypolys)>=1
            for mypoly in list(mypolys):
                tmapoly=makepoly.poly(tmaitem['points'])
                #print mypoly
                #print tmapoly
                shape=mypoly.subtract(tmapoly)
                newpolys=shape.get_polys()
                if len(newpolys)>1:
                    print "Length is:", len(newpolys)
                #print "Cutting"
                outmypolys.extend([shapemerge2d.Polygon(x) for x in list(newpolys)])
                #assert len(newpolys)==1
            if len(outmypolys)>1:
                print "outmypolys:",outmypolys
                #print "Cut to:",mypoly
            mypolys=outmypolys
            
        for mypoly in mypolys:
            t=[]
            for mx,my in [(v.get_x(),v.get_y()) for v in  mypoly.get_vertices()]:
                t.append(mapper.to_str(mapper.merc2latlon((mx,my),13)))
            #print "Aft cut:",t
            newspace=dict(space)
            newspace['points']=t            
            atsout.append(newspace)
        if len(mypolys)>1:    
            print "Space was split into ",len(mypolys),"parts"
    res.extend(atsout)
        
    res.append(dict(
        name="FINLAND FIR",
        icao="EFIN",
        floor='GND',
        ceiling='-',
        freqs=[],
        type='FIR',
        date=datetime(2011,4,9),
        points=mapper.parse_coord_str("""                                   
    601130N 0190512E - 601803N 0190756E -
610000N 0191905E - 614000N 0193000E -
631000N 0201000E - 632830N 0204000E -
633700N 0213000E - 644100N 0225500E -
653148N 0240824E -
Along the common X/Y state boundary to 690336N 0203255E -
Along the common X/Y state boundary to 690307N 0285545E -
Along the common X/Y state boundary to 601201N 0271735E - 
600800N 0263300E -
595830N 0260642E - 595300N 0255200E -
595430N 0252000E - 595300N 0245100E -
590000N 0210000E - 591524N 0203239E -
593346N 0195859E - 601130N 0190512E
""",context="finland")))
        
        
        
    #for pa in res:
    #    pretty(pa)
        
        
    return res
def find_areas(page):
    areastarts=sorted(
        list(page.get_by_regex(r".*?\d{4,6}[NS].*"))+
        list(page.get_by_regex(r".*?\d{5,7}[EW].*"))
        ,
        key=lambda x:(x.y1,x.x1))
    #for area in areastarts:
    #    print "Area font:",area.fontsize,area.font,"bolditalic:",area.bold,area.italic
    #    print " - Area:",area.text
        
    print "Found %d area-lines on page"%(len(areastarts),)
    print areastarts
    if len(areastarts)==0: return
    idx=0
    cury=None
    while True:
        firstdiff=None
        process=[]
        miny=None
        maxy=None
        while idx<len(areastarts):
            process.append(areastarts[idx])            
            cury=areastarts[idx].y1

            if miny==None or maxy==None:
                miny=cury
                maxy=cury
            miny=min(areastarts[idx].y1,miny)
            maxy=max(areastarts[idx].y2,maxy)
            
            
            #print "Diff:",diff,"firstdiff:",firstdiff,"delta:",diff-firstdiff if diff!=None and firstdiff!=None else ''
            idx+=1
            if idx<len(areastarts):
                diff=areastarts[idx].y1-cury
                if diff!=0:
                    if firstdiff==None: firstdiff=diff
                #print "Diff:",diff
                if diff>6.0: 
                    #print "Diff too big"
                    break
                if firstdiff and diff>1.35*firstdiff: 
                    #print "bad spacing",diff,1.5*firstdiff
                    break
        #print "Determined that these belong to one area:",process
        if len(process):
            alltext="\n".join(page.get_lines(process))
            print "<%s>"%(alltext,)
            anyarea=re.findall(r"((?:\d{4,6}[NS]\s*\d{5,7}[EW])+)",alltext,re.DOTALL|re.MULTILINE)
            print "Matching:"
            print anyarea
            if not len(anyarea): continue
            if len(anyarea)>=3:
                coords=parse_coord_str(" - ".join(anyarea),filter_repeats=True)
                print "AREA:"
                print coords
                print "===================================="
                assert len(coords)>=3
                coordfontsize=process[0].fontsize
                areaname=None
                for item in reversed(sorted(page.get_partially_in_rect(0,0,100,process[0].y1),key=lambda x:(x.y1,x.x1))):
                    if item.text.strip()=="": continue
                    #print "fontsize",item.fontsize,item.text,"y1:",item.y1
                    if item.fontsize>process[0].fontsize or item.bold>process[0].bold or item.italic>process[0].italic:
                        assert item.y1!=None
                        miny=min(item.y1,miny)
                        print "Found name: <%s>. Fonts: %d, %d, Fontsize: %s, old fontsize: %s"%(item.text,item.font,process[0].font,item.fontsize,process[0].fontsize)
                        prevx1=item.x1
                        revname=[]
                        for nameitem in reversed(sorted(page.get_partially_in_rect(0,item.y1+0.01,item.x2,item.y2-0.01),key=lambda x:(x.x1))):
                            if prevx1-nameitem.x2>3.0:
                                break
                            revname.append(nameitem.text.strip())                                
                        areaname=" ".join(reversed(revname))
                        break       
                yield (areaname,coords,dict(y1=miny,y2=maxy))
        if idx>=len(areastarts): break            
def ee_parse_airfields2():
    ads = []
    spaces = []
    airac_date = get_airac_date()
    print "airac", airac_date
    overview_url = "/%s/html/eAIP/EE-AD-0.6-en-GB.html" % (airac_date, )

    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(overview_url, country='ee')
    parser.feed(data)
    tree = parser.close()
    icaos = []
    for cand in tree.xpath(".//h3"):
        txts = alltexts(cand.xpath(".//a"))
        aps = re.findall(r"EE[A-Z]{2}", " ".join(txts))
        if aps:
            icao, = aps
            if alltext(cand).count("HELIPORT"):
                print "Ignore heliport", icao
                continue
            icaos.append(icao)

    for icao in icaos:
        ad = dict(icao=icao)
        url = "/%s/html/eAIP/EE-AD-2.%s-en-GB.html" % (airac_date, icao)
        data, date = fetchdata.getdata(url, country='ee')
        parser.feed(data)
        tree = parser.close()
        thrs = []

        for h3 in tree.xpath(".//h3"):
            txt = alltext(h3)
            print repr(txt)
            ptrn = ur"\s*%s\s+[—-]\s+(.*)" % (unicode(icao.upper()), )
            m = re.match(ptrn, txt, re.UNICODE)
            if m:
                assert not 'name' in ad
                ad['name'] = m.groups()[0]

        for tr in tree.xpath(".//tr"):
            txt = alltext(tr)
            m = re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*",
                         txt)
            #print "Matching,",txt,":",m
            if m:
                crds, = m.groups()
                ad['pos'] = mapper.anyparse(crds)

        space = dict()
        for table in tree.xpath(".//table"):
            for tr in table.xpath(".//tr"):
                trtxt = alltext(tr)
                if trtxt.count("Designation and lateral limits"):
                    space = dict()
                    coords = tr.getchildren()[2]
                    lines = alltext(coords).split("\n")
                    if lines[0].strip() == 'NIL':
                        continue

                    zname, what, spill = re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)",
                                                  lines[0]).groups()
                    if spill and spill.strip():
                        rest = [spill] + lines[1:]
                    else:
                        rest = lines[1:]
                    what = what.strip()
                    assert ad['name'].upper().strip().count(
                        zname.upper().strip())
                    assert what in ['FIZ', 'TIZ', 'CTR']
                    space['type'] = what
                    space['points'] = mapper.parse_coord_str("\n".join(rest))

                    space['name'] = zname + " " + what
                    space['date'] = date
                    space['url'] = fetchdata.getrawurl(url, 'ee')

                if trtxt.count("Vertical limits"):
                    vlim = alltext(tr.getchildren()[2])
                    if vlim.strip() == 'NIL': continue
                    space['floor'], space['ceiling'] = vlim.split(" to ")

                #space['freqs']=x

        #hlc=False
        for h4 in tree.xpath(".//h4"):
            txt = alltext(h4)
            if txt.lower().count("charts"):
                par = h4.getparent()
                for table in par.xpath(".//table"):
                    for idx, tr in enumerate(table.xpath(".//tr")):
                        name,page=\
                            tr.getchildren()
                        nametxt = alltext(name)
                        print "nametxt:", nametxt, "link:"
                        for reg, variant in [
                            (r"Aerodrome.*Chart.*", ""),
                            (r"Landing.*Chart.*", "landing"),
                            (r".*Parking.*Chart.*", "parking"),
                            (r".*Visual.*Approach.*|.*\bVAC\b.*", "vac")
                        ]:
                            if re.match(reg, nametxt):
                                for a in page.xpath(".//a"):
                                    print "linklabel", a.text
                                    print "attrib:", a.attrib
                                    href = a.attrib['href']
                                    print "Bef repl", href
                                    if href.lower().endswith("pdf"):
                                        href = href.replace(
                                            "../../graphics",
                                            "/%s/graphics" % (airac_date, ))
                                        print "href:", href, airac_date
                                        assert href
                                        parse_landing_chart.help_plc(
                                            ad,
                                            href,
                                            icao,
                                            ad['pos'],
                                            "ee",
                                            variant=variant)
                                        """arp=ad['pos']
                                        lc=parse_landing_chart.parse_landing_chart(
                                                href,
                                                icao=icao,
                                                arppos=arp,country="ee")
                                        assert lc
                                        if lc:
                                            ad['adcharturl']=lc['url']
                                            ad['adchart']=lc
                                            hlc=True
                                            #chartblobnames.append(lc['blobname'])
                                        """
        #assert hlc
        for h4 in tree.xpath(".//h4"):
            txt = alltext(h4)
            if txt.count("RUNWAY PHYSICAL"):
                par = h4.getparent()

                for table in par.xpath(".//table"):
                    prevnametxt = ""
                    for idx, tr in enumerate(table.xpath(".//tr")):
                        if idx == 0:
                            fc = alltext(tr.getchildren()[0])
                            print "FC", fc
                            if not fc.count("Designations"):
                                break  #skip table
                        if idx < 2: continue
                        if len(tr.getchildren()) == 1: continue
                        print "c:", tr.getchildren(), alltexts(
                            tr.getchildren())
                        desig, trubrg, dims, strength, thrcoord, threlev = tr.getchildren(
                        )
                        rwy = re.match(r"(\d{2}[LRC]?)", alltext(desig))
                        altc = alltext(thrcoord)
                        print "Matching", altc
                        print "rwymatch:", alltext(desig)
                        m = re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*",
                                     altc, re.DOTALL | re.MULTILINE)
                        if m:
                            lat, lon = m.groups()
                            print "Got latlon", lat, lon
                            thrs.append(
                                dict(pos=mapper.parse_coords(lat, lon),
                                     thr=rwy.groups()[0]))

        space['freqs'] = []
        for h4 in tree.xpath(".//h4"):
            txt = alltext(h4)
            if txt.count("ATS COMMUNICATION"):
                par = h4.getparent()
                for table in par.xpath(".//table"):
                    for idx, tr in enumerate(table.xpath(".//tr")):
                        print "cs", repr(tr.getchildren()), alltexts(
                            tr.getchildren())
                        print len(tr.getchildren())
                        if len(tr.getchildren()) != 5:
                            if "".join(alltexts(
                                    tr.getchildren())).count(u"EMERG"):
                                continue  #Sometimes emergency freq is listed, and then it is without callsign
                        service,callsign,frequency,hours,remarks=\
                            tr.getchildren()
                        callsigntxt = alltext(callsign)
                        if idx < 2:
                            if idx == 0:
                                assert callsigntxt.strip() == "Call sign"
                            if idx == 1:
                                assert callsigntxt.strip() == "2"
                            continue
                        ftext = alltext(frequency)
                        print "matching freq", ftext
                        for freq in re.findall(ur"\b\d{3}\.\d{1,3}", ftext):
                            freqmhz = float(freq)
                            space['freqs'].append(
                                (callsigntxt.strip(), freqmhz))

        if space and 'points' in space:
            assert 'freqs' in space
            assert 'points' in space
            assert 'floor' in space
            assert 'ceiling' in space
            assert 'type' in space
            spaces.append(space)
        if thrs:
            ad['runways'] = rwy_constructor.get_rwys(thrs)

        aip_text_documents.help_parse_doc(ad,
                                          url,
                                          icao,
                                          "ee",
                                          title="General Information",
                                          category="general")

        ad['date'] = date
        ad['url'] = fetchdata.getrawurl(url, 'ee')
        print "AD:", ad
        assert 'pos' in ad
        assert 'name' in ad
        ads.append(ad)
def ee_parse_restrictions():
    spaces=[]
    p=parse.Parser("/ee_restricted_and_danger.pdf",lambda x: x,country='ee')
    for pagenr in xrange(p.get_num_pages()):        
        page=p.parse_page_to_items(pagenr)
        raws=list(sorted(page.get_by_regex(ur"EE[RD]\d+\s+.*"),key=lambda x:x.y1))+[None]
        if len(raws)>1:
            elevs=page.get_by_regex(ur"\d+\s*FT\s*MSL|FL\s*\d+")
            assert elevs
            elevcol=min(elev.x1 for elev in elevs)
            assert elevcol!=100
            for cur,next in izip(raws[:-1],raws[1:]):
                #if cur.text.count("Tunnus, nimi ja sivurajat"): continue #not a real airspace
                space=dict()
                if next==None:
                    y2=100
                else:
                    y2=next.y1-1.75
                name=cur.text.strip()
                space['name']=name
                

            
                areaspecprim=page.get_lines(page.get_partially_in_rect(cur.x1+0.01,cur.y2+0.05,elevcol-2,y2),
                                            fudge=.25)
                #print "areaspecprim:\n","\n".join(areaspecprim)
                areaspec=[]
                for area in areaspecprim:
                    print "area in ",area
                    area=area.replace(u"–","-")
                    if len(areaspec) and area.strip()=="": break
                    area=re.sub(ur"\w-$","",area)
                    areaspec.append(area)
                #print "Y-interval:",cur.y1,y2,"next:",next
                #print "Name:",space['name']
                #print "areaspec:",areaspec
                inp=" ".join(areaspec)
                #print inp
                #raw_input()
                
                tpoints=mapper.parse_coord_str(inp,context='estonia')
                if name.startswith("EER1"): 
                    tseaborder="592842N 0280054E - 593814N 0273721E - 593953N 0265728E - 594513N 0264327E"
                    seapoints=mapper.parse_coord_str(tseaborder)
                    cont=None      
                    points=[]
                    def close(a,b):
                        bearing,dist=mapper.bearing_and_distance(
                                    mapper.from_str(a),mapper.from_str(b))
                        #print (a,b),dist
                        return dist<1.0
                    for idx,point in enumerate(tpoints):
                        points.append(point)    
                        if close(point,seapoints[0]):
                            print "WAS CLOSE",point,seapoints[0]
                            points.extend(seapoints[1:-1])
                            for idx2,point in enumerate(tpoints[idx+1:]):
                                if close(point,seapoints[-1]):
                                    points.extend(tpoints[idx+1+idx2:])
                                    break
                            else:
                                raise Exception("Couldn't find seaborder end")
                            break                    
                    else:
                        raise Exception("Couldn't find seaborder")
                else:
                    points=tpoints
                space['points']=points
                vertitems=page.get_partially_in_rect(elevcol,cur.y1+0.05,elevcol+8,y2+1.5)
                vertspec=[]
                for v in page.get_lines(vertitems):
                    if v.strip()=="": continue
                    if v.strip().count("Lennuliiklusteeninduse AS"): 
                        continue
                    vertspec.append(v.strip())
                
                print "vertspec:",vertspec
                assert len(vertspec)==2
                ceiling,floor=vertspec
                
                if mapper.parse_elev(floor)>=9500 and mapper.parse_elev(ceiling)>=9500:
                    continue
                
                space['ceiling']=ceiling
                space['floor']=floor
                space['type']='R'
                space['freqs']=[]
                spaces.append(space)
                


    spaces.append(dict(
        name="EE TSA 1",
        ceiling="UNL",
        floor="5000 FT GND",
        points=mapper.parse_coord_str(u""" 
            594500N 0255000E – 594500N 0261800E – 
            592100N 0265800E – 591200N 0261200E – 
            591600N 0255400E – 594500N 0255000E"""),
        type="TSA",
        date=datetime(2011,03,25),
        freqs=[]))
Beispiel #39
0
def ep_parse_tma():
    spaces = []
    pages, date = miner.parse('/_Poland_EP_ENR_2_1_en.pdf',
                              country='ep',
                              usecache=True,
                              maxcacheage=86400 * 7)

    for nr, page in enumerate(pages):
        #if nr!=1: continue
        #print "page",nr
        #print page.items
        desigs = page.get_by_regex(ur".*DESIGNATION AND LATERAL.*", re.DOTALL)
        for desig, next in izip(desigs, desigs[1:] + [None]):

            if nr == 0:
                #FIR

                uwagi = page.get_by_regex_in_rect(ur".*UWAGI\s*/\s*REMARKS.*",
                                                  0, desig.y2, 100, 100,
                                                  re.DOTALL)[0]
                coords = page.get_lines2(
                    page.get_partially_in_rect(0, desig.y2 + 0.5,
                                               desig.x2 + 10, uwagi.y1 - 0.5))

                raw = "\n".join(coords)
                #print "Raw:\n",raw
                d = md5.md5(raw.encode('utf8')).hexdigest()
                assert d == "f336800a8183f1360415d2afef38e9ae"
                #print "Md5-digest",d
                #/further along the state border to the point 54°36’14.03”N 019°24’15.02”E -

                raw = fixup(u"""
54°27’28.03”N 019°38’24.05”E -
54°36’14.03”N 019°24’15.02”E -
55°50’58.98”N 017°32’52.80”E -
54°54’58.84”N 015°51’52.92”E -
54°55’00.00”N 015°08’07.00”E -
/from this point the arc of 30 km radius centred at point 55°04’04”N 014°44’48”E -
54°55’00”N 014°21’27”E - 
54°07’38”N 014°15’17”E -
54°07’34”N 014°12’05”E -
53°59’16”N 014°14’32”E -
53°55’40”N 014°13’34”E -
<hack_longway_around_border>/further along the state border to the point 542615N 0194751E
                """)

                ##print "rw:",raw
                fir = mapper.parse_coord_str(raw, context='poland')
                fir_context = [
                    fir
                ]  #In principle, a FIR could consist of multiple non-overlapping regions. In this case, the list here would contain more than one list of points
                #print fir
                #sys.exit(1)

                spaces.append(
                    dict(points=fir,
                         name="WARSZAWA FIR",
                         icao="EPWW",
                         floor="GND",
                         ceiling="-",
                         freqs=[],
                         type="FIR",
                         date=date))
                continue

            areas = page.get_partially_in_rect(50, desig.y1 - 3, 100,
                                               desig.y1 - 0.5)
            #print "partially: <%s>"%(areas,)
            if len(areas) == 0:
                #print "Found continuation of area:",area
                pass
            else:
                lines = []
                for s in reversed(page.get_lines2(areas)):
                    if s.y1 >= desig.y1: break
                    if re.match("\d+ \w{3} 2[01]\d{2}", s):
                        break
                    if re.match(ur"\s*AIP\s*POLAND\s*", s):
                        #not real area.
                        break
                    if s.count("Responsibility boundary within SECTOR"):
                        lines = []  #not real area name
                        break
                    m = re.match(".*\d+\.?\d*\s*([\w\s()]+)\s*$", s,
                                 re.UNICODE)
                    if m:
                        print "matched name", s, "as: <%s>" % (m.groups())
                        lines = [m.groups()[0]]
                        break
                    lines.append(s.strip())

                if len(lines) == 0:
                    pass
                    #print "Continuation of area:",area
                else:
                    area = " ".join(lines)
                    print "areastr:", area

            print "Parsing area\n-------------------------------------------------\n\n", area
            uwagis = page.get_by_regex_in_rect(ur".*UWAGI/REMARKS.*", 0,
                                               desig.y2 + 1, 100, 100,
                                               re.DOTALL)
            y2 = 100
            if len(uwagis):
                #print "Uwagi y1:",uwagis[0].y1
                y2 = min(uwagis[0].y1 - 0.1, y2)
            if next:
                y2 = min(next.y1, y2)
                #print "next.y1",next.y1
            #print "End of desig",y2
            #print desig
            units = page.get_by_regex_in_rect(ur".*UNIT PROVIDING.*", desig.x2,
                                              desig.y1, 100, desig.y2,
                                              re.DOTALL)
            if len(units) == 0: continue
            unit, = units
            vertlim, = page.get_by_regex_in_rect(ur".*VERTICAL LIMITS.*",
                                                 desig.x2, desig.y1, 100,
                                                 desig.y2, re.DOTALL)
            freq, = page.get_by_regex_in_rect(ur".*FREQUENCY.*", desig.x2,
                                              desig.y1, 100, desig.y2,
                                              re.DOTALL)

            #print "Looking in ",desig.y2+0.5,y2
            desigs = page.get_partially_in_rect(0, desig.y2 + 0.5,
                                                desig.x2 + 1, y2 - 0.8)
            #print "desigs,",repr(desigs)
            """
            def clump(desigs):
                out=[]
                y1=1e30
                y2=None
                for desig in desigs:
                    if y2!=None:
                        delta=desig.y1-y2
                        if delta>
                    y1=min(desig.y1,y1)
                    y2=max(desig.y2,y2)
                    out.append(desig.text)
            """
            #last_curfreq=None
            #out=[]

            if re.match(ur".*ATS\s*SERVICES\s*DELEGATION.*", area):
                break

            raws = []
            found_x1 = None
            for sub in desigs:
                #print "\n\n-> y2",y2," cur sub:",sub.y1
                if sub.y1 >= y2:
                    break
                wholerow = page.get_lines2(
                    page.get_partially_in_rect(0, sub.y1 + 0.25, 100,
                                               sub.y2 - 0.25))
                wholerowstr = " ".join(wholerow)
                #print "Parse:<%s>"%(wholerowstr,)
                if re.match(ur".*\d+\.\d+\s+[\w\s*]+CONTROL\s*AREA\s*$",
                            wholerowstr, re.UNICODE):
                    break
                if re.match(ur".*\d+\s+ATS\s*SERVICES\s*DELEGATION.*",
                            wholerowstr, re.UNICODE):
                    break
Beispiel #40
0
                        #print "freqname Matched:",line
                        fname = line.strip()
                        break
                if not fname:
                    raise Exception("Found no frequency name for freq: " +
                                    freq)
                freqs.append((fname, float(freq)))
            if len(freqs): break

        (ceiling, ceilingy), (floor, floory) = verts
        assert ceilingy < floory
        assert floory - ceilingy < 5.0
        #uprint("Analyzing area for %s"%(name,))
        assert "".join(areaspec).strip() != ""
        print areaspec
        area = mapper.parse_coord_str("".join(areaspec))
        #uprint("Done analyzing %s"%(name,))
        #print area
        if name.count("CTA") and name.count("TMA") == 0:
            type_ = "CTA"
        else:
            type_ = "TMA"

        out.append(
            dict(floor=floor,
                 ceiling=ceiling,
                 freqs=freqs,
                 type=type_,
                 name=name,
                 points=area))
Beispiel #41
0
                                ur".*The\s*line\s*joining.*", text):
                            continue
                        if not seenreal and text.endswith("following points:"):
                            continue
                        if not seenreal and text == "points:":
                            continue
                    if text.endswith("E"):
                        text = text + " - "
                    seenreal = True
                    coords.append(text)
                    last = sub
                pass
            assert points == None
            coordstr = fixup(" ".join(coords))
            print "Raw coords:", coordstr
            points = mapper.parse_coord_str(coordstr)
            assert ceiling
            assert floor
            assert ctrname
            spaces.append(
                dict(name=ctrname,
                     points=points,
                     type="CTR",
                     ceiling=ceiling,
                     floor=floor,
                     freqs=freqs))

            #not first page:
    assert points != None
    return dict(
        name=name,
def fi_parse_airfield(icao=None):
    spaces=[]
    ad=dict()
    assert icao!=None
    ad['icao']=icao
    sigpoints=[]
    #https://ais.fi/ais/eaip/pdf/aerodromes/EF_AD_2_EFET_EN.pdf
    #https://ais.fi/ais/eaip/aipcharts/efet/EF_AD_2_EFET_VAC.pdf
    #vacp=parse.Parser("/ais/eaip/aipcharts/%s/EF_AD_2_%s_VAC.pdf"%(icao.lower(),icao),lambda x: x,country="fi")
    def remove_italics(x):
        return x.replace("<i>","").replace("</i>","")
    p=parse.Parser("/ais/eaip/pdf/aerodromes/EF_AD_2_%s_EN.pdf"%(icao,),remove_italics,country="fi")


    #The following doesn't actually work, since finnish VAC are bitmaps!!! :-(
    if 0:
        vacpage=vacp.parse_page_to_items(0)
        repp=vacpage.get_by_regex("\s*REPORTING\s*POINTS\s*")
        assert len(repp)>0
        for item in repp:    
            lines=iter(page.get_lines(page.get_partially_in_rect(item.x1,item.y2+0.1,100,100)))
            for line in lines:
                uprint("Looking for reporting points:%s"%(line,))
                name,lat,lon=re.match(ur"([A-ZÅÄÖ\s ]{3,})\s*([ \d]+N)\s*([ \d]+E).*",line)
                sigpoints.append(dict(
                    name=icao+" "+name.strip(),
                    kind="reporting",
                    pos=mapper.parse_coords(lat.replace(" ",""),lon.replace(" ",""))))




    page=p.parse_page_to_items(0)
    nameregex=ur"%s\s+-\s+([A-ZÅÄÖ\- ]{3,})"%(icao,)
    for item in page.get_by_regex(nameregex):
        #print "fontsize:",item.fontsize
        assert item.fontsize>=14
        ad['name']=re.match(nameregex,item.text).groups()[0].strip()
        break
    for item in page.get_by_regex(ur".*ELEV\s*/\s*REF.*"):
        lines=page.get_lines(page.get_partially_in_rect(0,item.y1+0.1,100,item.y2-0.1))
        for line in lines:
            print "Line:",line
            ft,=re.match(".*ELEV.*([\d\.]+)\s*FT.*",line).groups()
            assert not 'elev' in ad
            ad['elev']=float(ft)
        

        
    for item in page.get_by_regex(ur"Mittapisteen.*sijainti"):
        lines=page.get_lines(page.get_partially_in_rect(item.x1,item.y1,100,item.y2))        
        for line in lines:
            for crd in mapper.parsecoords(line):
                assert not ('pos' in ad)
                ad['pos']=crd
                

    
    parse_landing_chart.help_plc(ad,
        "/ais/eaip/aipcharts/%s/EF_AD_2_%s_ADC.pdf"%(icao.lower(),icao.upper()),
        icao,ad['pos'],country='fi'
                        )
    parse_landing_chart.help_plc(ad,
        "/ais/eaip/aipcharts/%s/EF_AD_2_%s_VAC.pdf"%(icao.lower(),icao.upper()),
        icao,ad['pos'],country='fi',variant='VAC'
                        )
    parse_landing_chart.help_plc(ad,
        "/ais/eaip/aipcharts/%s/EF_AD_2_%s_LDG.pdf"%(icao.lower(),icao.upper()),
        icao,ad['pos'],country='fi',variant='landing'
                        )

    parse_landing_chart.help_plc(ad,
        "/ais/eaip/aipcharts/%s/EF_AD_2_%s_APDC.pdf"%(icao.lower(),icao.upper()),
        icao,ad['pos'],country='fi',variant='parking'
                        )
    
    aip_text_documents.help_parse_doc(ad,"/ais/eaip/pdf/aerodromes/EF_AD_2_%s_EN.pdf"%(icao.upper(),),
                        icao,"fi",title="General Information",category="general")

                                
    ad['runways']=[]
    thrs=[]
    freqs=[]
    for pagenr in xrange(p.get_num_pages()):
        page=p.parse_page_to_items(pagenr)
        if page==None: continue
        for item in page.get_by_regex("\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*"):
            lines=page.get_lines(page.get_partially_in_rect(0,item.y2+0.1,100,100))
            for line in lines:
                if re.match(ur"AD\s+2.13",line): break
                m=re.match(ur".*?(RWY END)?\s*\*?(\d{6}\.\d+N)\s*(\d{6,7}\.\d+E).*",line)
                if not m:continue
                rwyend,lat,lon=m.groups()
                rwytxts=page.get_lines(page.get_partially_in_rect(0,line.y1,12,line.y2))
                print "Rwytxts:",rwytxts
                rwytxt,=rwytxts
                uprint("rwytext:",rwytxt)
                rwy,=re.match(ur"\s*(\d{2}[LRCM]?)\s*[\d.]*\s*",rwytxt).groups()
                have_thr=False
                for thr in thrs:
                    if thr['thr']==rwy:
                        have_thr=True
                if rwyend!=None and have_thr:
                    continue
                thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy))
        
        for item in page.get_by_regex("ATS AIRSPACE"):
            lines=iter(page.get_lines(page.get_partially_in_rect(0,item.y2+0.1,100,100)))
            spaces=[]
            line=lines.next()
            while True:
                while line.strip()=="":
                    line=lines.next()
                print "Read line:",line
                if line.count("Vertical limits"):
                    break                            
                m=re.match(ur".*?/\s+Designation and lateral limits\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:?\s*$",line)
                if not m:
                    m=re.match(ur"\s*(.*\b(?:CTR|FIZ)\b.*?)\s*:",line)
                    #print "Second try:",m
                    
                spacename,=m.groups()
                #print "Got spacename:",spacename
                assert spacename.strip()!=""
                coords=[]
                while True:
                    line=lines.next()
                    print "Further:",line                        
                    
                    if line.count("Vertical limits"):
                        print "Breaking"
                        break                            
                    if not re.search(ur"[\d ]+N\s*[\d ]+E",line) and  \
                        not re.search(ur"circle|cent[red]{1,5}|pitkin|point|equal\s*to",line):
                        print "Breaking"
                        break
                    coords.append(line)
                    
                areaspec="".join(coords)
                
                def fixup(m):
                    lat,lon=m.groups()
                    return lat.replace(" ","")+" "+lon.replace(" ","")
                areaspec=re.sub(ur"([\d ]+N)\s*([\d ]+E)",fixup,areaspec)
                
                areaspec=re.sub(ur"\(.*/\s*equal\s*to\s*Malmi\s*CTR\s*lateral\s*limits\)","",areaspec)
                #print "Fixed areaspec",areaspec
                #if icao=="EFKS":
                #    areaspec=areaspec.replace("6615 28N","661528N")
#Error! REstriction areas!
                spaces.append(dict(
                    name=spacename,
                    type="CTR",
                    points=mapper.parse_coord_str(areaspec)))
                if line.count("Vertical limits"):
                    #print "Breaking"
                    break                            
            while not line.count("Vertical limits"):
                line=lines.next()
            #print "Matching veritcal limits--------------------------------"
            oldspaces=spaces
            spaces=[]
            for space in oldspaces:
                if space['name'].count("/"):
                    a,b=space['name'].split("/")
                    spaces.append(dict(space,name=a.strip()))
                    spaces.append(dict(space,name=b.strip()))
                else:
                    spaces.append(space)
            missing=set([space['name'] for space in spaces])
            while True:
                for space in spaces:

                    for it in xrange(3):  
                        cand=space['name']
                        if it==1:
                            if cand.count("CTR"):
                                cand="CTR"
                            if cand.count("FIZ"):
                                cand="FIZ"
                        if it==2:
                            if cand.count("CTR"):
                                cand=r"CTR\s*/[\sA-Z]+"
                            if cand.count("FIZ UPPER"):
                                cand="FIZ UPPER"
                            if cand.count("FIZ LOWER"):
                                cand="FIZ LOWER"
                        m=re.match(ur".*%s\s*:([^,:-]*)\s*-\s*([^,:-]*)"%(cand,),line)
                        print "Matching ",cand," to ",line,"missing:",missing,m
                        if m: break
                        
                    if len(spaces)==1 and not m:                        
                        m=re.match(ur".*Vertical limits\s*(.*)\s*-\s*(.*)",line)
                    if m:
                        print "*****MATCH!!:::",m.groups()
                        for lim in m.groups():
                            assert lim.count(",")==0
                        space['floor'],space['ceiling']=m.groups()
                        missing.remove(space['name'])
                    #print "Missing:"
                    if len(missing)==0: break
                if len(missing)==0: break
                #print "Still missing:",missing
                line=lines.next()
            
        print "Parse f o n page",pagenr      
        for item2 in page.get_by_regex(ur".*ATS\s*COMMUNICATION\s*FACILITIES.*"):
            lines=page.get_lines(page.get_partially_in_rect(0,item2.y2+0.1,100,100))
            for line in lines:
                if line.count("RADIO NAVIGATION AND LANDING AIDS"): break
                print "Comm line:",line
                twr=re.match(ur"TWR.*(\d{3}\.\d{3})\b.*",line)
                if twr:
                    freqs.append(('TWR',float(twr.groups()[0]))) 
                atis=re.match(ur"ATIS.*(\d{3}\.\d{3})",line)
                if atis:
                    freqs.append(('ATIS',float(atis.groups()[0])))
                continue
            kind, name = re.match("EP (TSA|TRA|TFR) ([\d\w]+)",
                                  tra.text).groups()

            def fix_coords(s):
                def fixer(m):
                    a, b, c, d, e, f, g, h = m.groups()
                    return "%02d%02d%02d%s %03d%02d%02d%s - " % (
                        int(a), int(b), int(c), d, int(e), int(f), int(g), h)

                return re.sub(
                    ur"(\d{2,3})°(\d{2})'(\d{2})''([NS])\s*(\d{2,3})°(\d{2})'(\d{2})''([EW])",
                    fixer, s)

            coordstr2 = fix_coords("".join(o)).rstrip().rstrip("-")
            print "COordstr:", coordstr2
            spaces.append(
                dict(name="EP %s %s" % (kind, name),
                     points=mapper.parse_coord_str(coordstr2,
                                                   context="poland"),
                     ceiling=ceiling,
                     floor=floor,
                     type="TSA",
                     freqs=[]))
    return spaces


if __name__ == '__main__':
    for space in ep_parse_tra():
        print "space", space
Beispiel #44
0
                        coords.append(line)
                    else:
                        if line.count("SEKTOR"):
                            subname = lines[0].strip()

                raw = " ".join(coords)

                def s(x):
                    return x.replace(" ", ur"\s*")

                #raw=re.sub(s(ur"Linia łącząca następujące punkty : / The line joining the following points :? "),               #           "",raw)

                #print "raw area:<%s>"%(repr(raw),)

                points = mapper.parse_coord_str(raw,
                                                context="poland",
                                                fir_context=fir_context)

                if len(curvert) == 0:
                    lastspace = spaces[-1]
                    assert len(curunit) == 0
                    assert len(curfreq) == 0
                    lastspace['points'].extend(points)
                else:
                    curvert_out = []
                    for cur in curvert:
                        cur = cur.strip()
                        if cur.endswith("C"):
                            cur = cur[:-1].strip()
                        if not cur: continue
                        curvert_out.append(cur)
Beispiel #45
0
def parse_all_tma():
    def fixgote(raw):
        #Fix illogical composition of Göteborg TMA description. 2010 04 02
        did_replace=[0]
        def replacer(args):
            uprint(args.groups())
            y,x,w,h,font=args.groups()
            uprint(w,h)
            assert int(w)>=260 and int(w)<420
            assert int(h)>=6 and int(h)<=15
            f=float(w)/270.0
            x1=x
            y1=y
            w1=80
            h1=h

            x2=168*f
            y2=y
            w2=150*f
            h2=h
            did_replace[0]+=1
            repl="""<text top="%s" left="%s" width="%s" height="%s" font="%s">Part of GÖTEBORG TMA</text>
                           <text top="%s" left="%s" width="%s" height="%s" font="%s">584558N 0122951E - 584358N 0130950E - </text>"""%(
                           y1,x1,w1,h1,font,y2,x2,w2,h2,font)
            uprint("\n======================================\nReplacement:\n",repl)
            return repl
        raw=re.sub(r"""<text top="(\d+)" left="(\d+)" width="(\d+)" height="(\d+)" font="(\d+)">\s*Part of GÖTEBORG TMA  584558N 0122951E - 584358N 0130950E - </text>""",replacer,raw)
        assert did_replace[0]==1
        return raw
    p=parse.Parser("/AIP/ENR/ENR 2/ES_ENR_2_1_en.pdf")
	
    res=[]    
    found=False
    last_sector=dict()
    for pagenr in xrange(0,p.get_num_pages()):
        page=p.parse_page_to_items(pagenr)
        #print "Num acc-sec:",len(page.get_by_regex(r".*ACC.sectors.*"))
        #print "Num and acc-sec:",len(page.get_by_regex(r".*and\s+ACC.sectors.*"))
        
        sect=(len(page.get_by_regex(r".*ACC.sectors.*"))>0 and len(page.get_by_regex(r".*and\s+ACC.sector.*"))==0)
        #print "ACC-sector2:",sect        
        if found or page.get_by_regex(r".*Terminal Control Areas.*") or sect:
            found=True
        else:
            continue
        #if sect:        
        parsed=parse_page(p,pagenr,"TMA" if not sect else "sector",last_sector=last_sector)
        res.extend(parsed)
        
    res.append(dict(
        name="SWEDEN FIR",
        icao="ESAA",
        floor='GND',
        ceiling='-',
        freqs=[],
        type='FIR',
        date=datetime(2011,4,9),
        points=mapper.parse_coord_str("""
690336N 0203255E - 
Along the common X/Y state boundary to 653148N 0240824E -
644100N 0225500E - 633700N 0213000E -
632830N 0204000E - 631000N 0201000E -
614000N 0193000E - 610000N 0191905E -
601803N 0190756E - 601130N 0190512E -
593346N 0195859E - 591524N 0203239E -
590000N 0210000E - 573410N 0200900E -
570000N 0195000E - 555100N 0173300E -
545500N 0155200E - 545500N 0150807E -
clockwise along an arc centred on 550404N 0144448E and with radius 16.2 NM -
545500N 0142127E - 545500N 0125100E -
552012N 0123827E - Along the common X/Y state boundary to 561253N 0122205E -
583000N 0103000E - 584540N 0103532E -
585332N 0103820E - Along the common X/Y state boundary to 690336N 0203255E
                                        
""",context="sweden")))
        
    for pa in res:
        pretty(pa)
    return res
Beispiel #46
0
def ep_parse_tma():
    spaces=[]
    pages,date=miner.parse('/_Poland_EP_ENR_2_1_en.pdf',
                           country='ep',usecache=True,
                           maxcacheage=86400*7
                           )
    
    
    
    for nr,page in enumerate(pages):
        #if nr!=1: continue
        #print "page",nr
        #print page.items
        desigs=page.get_by_regex(ur".*DESIGNATION AND LATERAL.*",re.DOTALL)
        for desig,next in izip(desigs,desigs[1:]+[None]):
            
            
            
            if nr==0:
                #FIR
                
                uwagi=page.get_by_regex_in_rect(ur".*UWAGI\s*/\s*REMARKS.*",
                                0,desig.y2,100,100,re.DOTALL)[0]
                coords=page.get_lines2(page.get_partially_in_rect(
                        0,desig.y2+0.5,desig.x2+10,uwagi.y1-0.5))
                
                raw="\n".join(coords)
                #print "Raw:\n",raw
                d=md5.md5(raw.encode('utf8')).hexdigest()
                assert d=="f336800a8183f1360415d2afef38e9ae"
                #print "Md5-digest",d
#/further along the state border to the point 54°36’14.03”N 019°24’15.02”E -

                raw=fixup(u"""
54°27’28.03”N 019°38’24.05”E -
54°36’14.03”N 019°24’15.02”E -
55°50’58.98”N 017°32’52.80”E -
54°54’58.84”N 015°51’52.92”E -
54°55’00.00”N 015°08’07.00”E -
/from this point the arc of 30 km radius centred at point 55°04’04”N 014°44’48”E -
54°55’00”N 014°21’27”E - 
54°07’38”N 014°15’17”E -
54°07’34”N 014°12’05”E -
53°59’16”N 014°14’32”E -
53°55’40”N 014°13’34”E -
<hack_longway_around_border>/further along the state border to the point 542615N 0194751E
                """)
                
                ##print "rw:",raw 
                fir=mapper.parse_coord_str(raw,context='poland')
                fir_context=[fir]#In principle, a FIR could consist of multiple non-overlapping regions. In this case, the list here would contain more than one list of points
                #print fir
                #sys.exit(1)
                
                spaces.append(                            
                    dict(
                         points=fir,
                         name="WARSZAWA FIR",
                         icao="EPWW",
                         floor="GND",
                         ceiling="-",
                         freqs=[],
                         type="FIR",
                         date=date
                         ))
                continue

            areas=page.get_partially_in_rect(50,desig.y1-3,100,desig.y1-0.5)
            #print "partially: <%s>"%(areas,)
            if len(areas)==0:
                #print "Found continuation of area:",area
                pass
            else:
                lines=[]
                for s in reversed(page.get_lines2(areas)):
                    if s.y1>=desig.y1: break
                    if re.match("\d+ \w{3} 2[01]\d{2}",s):
                        break
                    if re.match(ur"\s*AIP\s*POLAND\s*",s):
                        #not real area.
                        break
                    if s.count("Responsibility boundary within SECTOR"):
                        lines=[] #not real area name
                        break
                    m=re.match(".*\d+\.?\d*\s*([\w\s()]+)\s*$",s,re.UNICODE)
                    if m:
                        print "matched name",s,"as: <%s>"%(m.groups())
                        lines=[m.groups()[0]]
                        break
                    lines.append(s.strip())
                    
                if len(lines)==0:
                    pass
                    #print "Continuation of area:",area
                else:
                    area=" ".join(lines)
                    print "areastr:",area 
            
            print "Parsing area\n-------------------------------------------------\n\n",area            
            uwagis=page.get_by_regex_in_rect(ur".*UWAGI/REMARKS.*",
                            0,desig.y2+1,100,100,re.DOTALL)
            y2=100
            if len(uwagis):
                #print "Uwagi y1:",uwagis[0].y1
                y2=min(uwagis[0].y1-0.1,y2)
            if next:
                y2=min(next.y1,y2)
                #print "next.y1",next.y1
            #print "End of desig",y2
            #print desig
            units=page.get_by_regex_in_rect(ur".*UNIT PROVIDING.*",
                                desig.x2,desig.y1,100,desig.y2,re.DOTALL)
            if len(units)==0: continue
            unit,=units
            vertlim,=page.get_by_regex_in_rect(ur".*VERTICAL LIMITS.*",
                                desig.x2,desig.y1,100,desig.y2,re.DOTALL)
            freq,=page.get_by_regex_in_rect(ur".*FREQUENCY.*",
                                desig.x2,desig.y1,100,desig.y2,re.DOTALL)
            
            #print "Looking in ",desig.y2+0.5,y2
            desigs=page.get_partially_in_rect(0,desig.y2+0.5,desig.x2+1,y2-0.8)
            #print "desigs,",repr(desigs)
            """
            def clump(desigs):
                out=[]
                y1=1e30
                y2=None
                for desig in desigs:
                    if y2!=None:
                        delta=desig.y1-y2
                        if delta>
                    y1=min(desig.y1,y1)
                    y2=max(desig.y2,y2)
                    out.append(desig.text)
            """     
            #last_curfreq=None
            #out=[]
            
            if re.match(ur".*ATS\s*SERVICES\s*DELEGATION.*",area):
                break
            
            raws=[]
            found_x1=None
            for sub in desigs:
                #print "\n\n-> y2",y2," cur sub:",sub.y1
                if sub.y1>=y2:
                    break
                wholerow=page.get_lines2(page.get_partially_in_rect(0,sub.y1+0.25,100,sub.y2-0.25))
                wholerowstr=" ".join(wholerow)
                #print "Parse:<%s>"%(wholerowstr,)
                if re.match(ur".*\d+\.\d+\s+[\w\s*]+CONTROL\s*AREA\s*$",wholerowstr,re.UNICODE):
                    break
                if re.match(ur".*\d+\s+ATS\s*SERVICES\s*DELEGATION.*",wholerowstr,re.UNICODE):
                    break
     break
 else:
     raise Exception("No limitstr")
 
 cstr=[]
 spacename=coordstr[0]
 assert spacename=="CTR"
 for sub in coordstr[1:]:
     cstr.append(sub.strip().rstrip("."))
 def fixfunc(m):
     return "".join(m.groups())
 raw=re.sub(ur"(\d{2,3})\s*(\d{2})\s*(\d{2})\s*([NSEW])",
                                      fixfunc,
                                      "".join(cstr)).replace(","," - ")
 print "parsing raw:",raw
 points=mapper.parse_coord_str(raw,context='lithuania')
                                      
 print "Limitstr",limitstr
 floor,ceiling=re.match(ur"(.*)\s*to\s*(.*)",limitstr).groups()
 mapper.parse_elev(floor)
 mapper.parse_elev(ceiling)
 
 spacenamestem=spacename.strip()
 if spacenamestem.endswith("CTR"):
     spacenamestem=spacenamestem[:-3].strip()
 if spacenamestem.endswith("FIZ"):
     spacenamestem=spacenamestem[:-3].strip()
 #construct names
 newfreqs=[]
 for serv,freq in freqs:
     serv=serv.strip()
Beispiel #48
0
                        if line.endswith("E"):
                            line+=" - "
                        coords.append(line)
                    else:
                        if line.count("SEKTOR"):
                            subname=lines[0].strip()
                        
                raw=" ".join(coords)
                def s(x):
                    return x.replace(" ",ur"\s*")
                #raw=re.sub(s(ur"Linia łącząca następujące punkty : / The line joining the following points :? "),               #           "",raw)
                
                #print "raw area:<%s>"%(repr(raw),)
                
                
                points=mapper.parse_coord_str(raw,context="poland",fir_context=fir_context)

                if len(curvert)==0:
                    lastspace=spaces[-1]
                    assert len(curunit)==0
                    assert len(curfreq)==0
                    lastspace['points'].extend(points)
                else:                    
                    curvert_out=[]
                    for cur in curvert:
                        cur=cur.strip()
                        if cur.endswith("C"):
                            cur=cur[:-1].strip()
                        if not cur:continue
                        curvert_out.append(cur)
                    print "Raw curvert_out:",repr(curvert_out)
Beispiel #49
0
def find_areas(page):
    areastarts = sorted(list(page.get_by_regex(r".*?\d{4,6}[NS].*")) +
                        list(page.get_by_regex(r".*?\d{5,7}[EW].*")),
                        key=lambda x: (x.y1, x.x1))
    #for area in areastarts:
    #    print "Area font:",area.fontsize,area.font,"bolditalic:",area.bold,area.italic
    #    print " - Area:",area.text

    print "Found %d area-lines on page" % (len(areastarts), )
    print areastarts
    if len(areastarts) == 0: return
    idx = 0
    cury = None
    while True:
        firstdiff = None
        process = []
        miny = None
        maxy = None
        while idx < len(areastarts):
            process.append(areastarts[idx])
            cury = areastarts[idx].y1

            if miny == None or maxy == None:
                miny = cury
                maxy = cury
            miny = min(areastarts[idx].y1, miny)
            maxy = max(areastarts[idx].y2, maxy)

            #print "Diff:",diff,"firstdiff:",firstdiff,"delta:",diff-firstdiff if diff!=None and firstdiff!=None else ''
            idx += 1
            if idx < len(areastarts):
                diff = areastarts[idx].y1 - cury
                if diff != 0:
                    if firstdiff == None: firstdiff = diff
                #print "Diff:",diff
                if diff > 6.0:
                    #print "Diff too big"
                    break
                if firstdiff and diff > 1.35 * firstdiff:
                    #print "bad spacing",diff,1.5*firstdiff
                    break
        #print "Determined that these belong to one area:",process
        if len(process):
            alltext = "\n".join(page.get_lines(process))
            print "<%s>" % (alltext, )
            anyarea = re.findall(r"((?:\d{4,6}[NS]\s*\d{5,7}[EW])+)", alltext,
                                 re.DOTALL | re.MULTILINE)
            print "Matching:"
            print anyarea
            if not len(anyarea): continue
            if len(anyarea) >= 3:
                coords = parse_coord_str(" - ".join(anyarea),
                                         filter_repeats=True)
                print "AREA:"
                print coords
                print "===================================="
                assert len(coords) >= 3
                coordfontsize = process[0].fontsize
                areaname = None
                for item in reversed(
                        sorted(page.get_partially_in_rect(
                            0, 0, 100, process[0].y1),
                               key=lambda x: (x.y1, x.x1))):
                    if item.text.strip() == "": continue
                    #print "fontsize",item.fontsize,item.text,"y1:",item.y1
                    if item.fontsize > process[
                            0].fontsize or item.bold > process[
                                0].bold or item.italic > process[0].italic:
                        assert item.y1 != None
                        miny = min(item.y1, miny)
                        print "Found name: <%s>. Fonts: %d, %d, Fontsize: %s, old fontsize: %s" % (
                            item.text, item.font, process[0].font,
                            item.fontsize, process[0].fontsize)
                        prevx1 = item.x1
                        revname = []
                        for nameitem in reversed(
                                sorted(page.get_partially_in_rect(
                                    0, item.y1 + 0.01, item.x2,
                                    item.y2 - 0.01),
                                       key=lambda x: (x.x1))):
                            if prevx1 - nameitem.x2 > 3.0:
                                break
                            revname.append(nameitem.text.strip())
                        areaname = " ".join(reversed(revname))
                        break
                yield (areaname, coords, dict(y1=miny, y2=maxy))
        if idx >= len(areastarts): break