Python parse Examples, fplan.extract.miner.parse Python Examples

Example #1

0

Show file

File: ek_parse_airfields.py Project: avl/SwFlightPlanner

def ek_parse_airfield(icao):
    # http://www.slv.dk
    # raise Exception("This doesn't work - you need to click through web-interface for the links to work")
    # url="/Dokumenter/dsweb/Get/Document-1492/EK_AD_2_%s_en.pdf"%(icao,)
    data, date = getdata(url, country="ek", maxcacheage=86400 * 7)
    if 0:
        pages, date = miner.parse(url, maxcacheage=86400, country="ek", usecache=True)
    print icao, "bytes:", len(data)

Example #2

0

Show file

def ek_parse_airfield(icao):
    #http://www.slv.dk
    #raise Exception("This doesn't work - you need to click through web-interface for the links to work")
    #url="/Dokumenter/dsweb/Get/Document-1492/EK_AD_2_%s_en.pdf"%(icao,)
    data,date=getdata(url,country="ek",maxcacheage=86400*7)
    if 0:
        pages,date=miner.parse(url,
                           maxcacheage=86400,
                           country='ek',usecache=True)
    print icao,"bytes:",len(data)

Example #3

0

Show file

def ep_parse_airfields(filtericao=None):
    pages, date = miner.parse("/aip/openp.php?id=EP_AD_1_en",
                              maxcacheage=86400 * 7,
                              country='ep',
                              usecache=True)
    icaos = []
    print "Nr pages:", len(pages)
    for nr, page in enumerate(pages):
        for item in page.get_by_regex(ur".*\bICAO\s*CODE\b.*"):
            print "Icao", item
            for icaoitem in page.get_partially_in_rect(item.x1, item.y1 + 0.1,
                                                       item.x2, 100):
                for icao in re.findall(ur"\b(EP[A-Z]{2})\b", icaoitem.text):
                    assert len(icao) == 4
                    icaos.append(icao)

Example #4

0

Show file

File: ek_parse_airfields.py Project: avl/SwFlightPlanner

def ek_parse_airfields():
    # http://www.slv.dk
    raise Exception("This doesn't work - you need to click through web-interface for the links to work")

    pages, date = miner.parse(
        "/Dokumenter/dsweb/Get/Document-6465/EK_AD_1_3_en.pdf", maxcacheage=86400, country="ek", usecache=True
    )
    icaos = []
    print "Nr pages:", len(pages)
    for nr, page in enumerate(pages):
        for item in page.get_by_regex(ur".*Aerodrome.*", re.UNICODE | re.IGNORECASE):
            print "Icao", item
            for icaoitem in page.get_partially_in_rect(item.x1, item.y1 + 0.1, item.x2, 100):
                for icao in re.findall(ur"\b(EK[A-Z]{2})\b", icaoitem.text):
                    assert len(icao) == 4
                    icaos.append(icao)

Example #5

0

Show file

def ek_parse_airfields():
    #http://www.slv.dk
    raise Exception("This doesn't work - you need to click through web-interface for the links to work")

    pages,date=miner.parse("/Dokumenter/dsweb/Get/Document-6465/EK_AD_1_3_en.pdf",
                           maxcacheage=86400,
                           country='ek',usecache=True)
    icaos=[]
    print "Nr pages:",len(pages)
    for nr,page in enumerate(pages):
        for item in page.get_by_regex(ur".*Aerodrome.*",re.UNICODE|re.IGNORECASE):
            print "Icao",item
            for icaoitem in page.get_partially_in_rect(item.x1,item.y1+0.1,item.x2,100):
                for icao in re.findall(ur"\b(EK[A-Z]{2})\b",icaoitem.text):                        
                    assert len(icao)==4
                    icaos.append(icao)

Example #6

0

Show file

def ep_parse_airfield(icao):
    spaces = []
    pages, date = miner.parse("/aip/openp.php?id=EP_AD_2_%s_en" % (icao, ),
                              maxcacheage=86400 * 7,
                              country='ep',
                              usecache=True)
    print "parsing ", icao, date
    points = None
    ctrname = None
    freqs = []
    for nr, page in enumerate(pages):
        if nr == 0:

            def filter_tiny(its):
                for it in its:
                    print "Filtering:", repr(it)
                    print "size %f of <%s>." % (it.y2 - it.y1, it.text)
                    textsize = it.y2 - it.y1
                    if textsize > 0.4:
                        yield it

            namehdg, = page.get_by_regex(
                ur".*AERODROME\s+LOCATION\s+INDICATOR\s+AND\s+NAME.*",
                re.DOTALL)
            subs = page.get_partially_in_rect(0, namehdg.y1 + 0.5, 100,
                                              namehdg.y2 + 2.5)
            allsubs = []
            for sub in subs:
                print "Item:", repr(sub)
                print "sub", repr(sub.subs)
                allsubs.extend(sub.subs)
            print "allsubs", allsubs
            lineobjs = list(filter_tiny(allsubs))
            for lineobj in lineobjs:
                line = lineobj.text.strip()
                print "line:", line
                if line == icao: continue
                if re.match(ur".*AERODROME\s*LOCATION\s*INDICATOR.*", line):
                    continue
                if re.match(
                        ur".*WSKAŹNIK\s*LOKALIZACJI\s*LOTNISKA\s*I\s*NAZWA.*",
                        line):
                    continue
                m = re.match(ur"%s\s*[-]\s*([\w\s/]+)" % (icao, ), line,
                             re.UNICODE | re.DOTALL)
                name, = m.groups()
                name = name.strip()
                break
            else:
                raise Exception("No name found!")
            print "Name:", name
            site, = page.get_by_regex(
                ur"ARP\s*-\s*WGS-84\s*coordinates\s*and\s*site\s*at\s*AD")
            print "site:", repr(site.text.strip())
            splat = site.text.strip().split("\n")
            print "splat:", splat
            print len(splat)
            poss = splat[1:]
            print "rawpos,", poss
            for line in poss:
                m = re.match(
                    ur"(\d+)°(\d+)'(\d+)''(N)\s*(\d+)°(\d+)'(\d+)''(E).*",
                    line)
                if m:
                    pos = mapper.parsecoord("".join(m.groups()))
                    break
            else:
                raise Exception("No pos found")

            elevi, = page.get_by_regex(
                ur"\s*Elevation/Reference\s*temperature\s*", re.DOTALL)
            elevft, = re.match(ur".*\d+\s+m\s*\((\d+)\s+ft\).*", elevi.text,
                               re.DOTALL).groups()
            elev = float(elevft)

Example #7

0

Show file

File: ep_parse_tma.py Project: dimme/SwFlightPlanner

def ep_parse_tma():
    spaces = []
    pages, date = miner.parse('/_Poland_EP_ENR_2_1_en.pdf',
                              country='ep',
                              usecache=True,
                              maxcacheage=86400 * 7)

    for nr, page in enumerate(pages):
        #if nr!=1: continue
        #print "page",nr
        #print page.items
        desigs = page.get_by_regex(ur".*DESIGNATION AND LATERAL.*", re.DOTALL)
        for desig, next in izip(desigs, desigs[1:] + [None]):

            if nr == 0:
                #FIR

                uwagi = page.get_by_regex_in_rect(ur".*UWAGI\s*/\s*REMARKS.*",
                                                  0, desig.y2, 100, 100,
                                                  re.DOTALL)[0]
                coords = page.get_lines2(
                    page.get_partially_in_rect(0, desig.y2 + 0.5,
                                               desig.x2 + 10, uwagi.y1 - 0.5))

                raw = "\n".join(coords)
                #print "Raw:\n",raw
                d = md5.md5(raw.encode('utf8')).hexdigest()
                assert d == "f336800a8183f1360415d2afef38e9ae"
                #print "Md5-digest",d
                #/further along the state border to the point 54°36’14.03”N 019°24’15.02”E -

                raw = fixup(u"""
54°27’28.03”N 019°38’24.05”E -
54°36’14.03”N 019°24’15.02”E -
55°50’58.98”N 017°32’52.80”E -
54°54’58.84”N 015°51’52.92”E -
54°55’00.00”N 015°08’07.00”E -
/from this point the arc of 30 km radius centred at point 55°04’04”N 014°44’48”E -
54°55’00”N 014°21’27”E - 
54°07’38”N 014°15’17”E -
54°07’34”N 014°12’05”E -
53°59’16”N 014°14’32”E -
53°55’40”N 014°13’34”E -
<hack_longway_around_border>/further along the state border to the point 542615N 0194751E
                """)

                ##print "rw:",raw
                fir = mapper.parse_coord_str(raw, context='poland')
                fir_context = [
                    fir
                ]  #In principle, a FIR could consist of multiple non-overlapping regions. In this case, the list here would contain more than one list of points
                #print fir
                #sys.exit(1)

                spaces.append(
                    dict(points=fir,
                         name="WARSZAWA FIR",
                         icao="EPWW",
                         floor="GND",
                         ceiling="-",
                         freqs=[],
                         type="FIR",
                         date=date))
                continue

            areas = page.get_partially_in_rect(50, desig.y1 - 3, 100,
                                               desig.y1 - 0.5)
            #print "partially: <%s>"%(areas,)
            if len(areas) == 0:
                #print "Found continuation of area:",area
                pass
            else:
                lines = []
                for s in reversed(page.get_lines2(areas)):
                    if s.y1 >= desig.y1: break
                    if re.match("\d+ \w{3} 2[01]\d{2}", s):
                        break
                    if re.match(ur"\s*AIP\s*POLAND\s*", s):
                        #not real area.
                        break
                    if s.count("Responsibility boundary within SECTOR"):
                        lines = []  #not real area name
                        break
                    m = re.match(".*\d+\.?\d*\s*([\w\s()]+)\s*$", s,
                                 re.UNICODE)
                    if m:
                        print "matched name", s, "as: <%s>" % (m.groups())
                        lines = [m.groups()[0]]
                        break
                    lines.append(s.strip())

                if len(lines) == 0:
                    pass
                    #print "Continuation of area:",area
                else:
                    area = " ".join(lines)
                    print "areastr:", area

            print "Parsing area\n-------------------------------------------------\n\n", area
            uwagis = page.get_by_regex_in_rect(ur".*UWAGI/REMARKS.*", 0,
                                               desig.y2 + 1, 100, 100,
                                               re.DOTALL)
            y2 = 100
            if len(uwagis):
                #print "Uwagi y1:",uwagis[0].y1
                y2 = min(uwagis[0].y1 - 0.1, y2)
            if next:
                y2 = min(next.y1, y2)
                #print "next.y1",next.y1
            #print "End of desig",y2
            #print desig
            units = page.get_by_regex_in_rect(ur".*UNIT PROVIDING.*", desig.x2,
                                              desig.y1, 100, desig.y2,
                                              re.DOTALL)
            if len(units) == 0: continue
            unit, = units
            vertlim, = page.get_by_regex_in_rect(ur".*VERTICAL LIMITS.*",
                                                 desig.x2, desig.y1, 100,
                                                 desig.y2, re.DOTALL)
            freq, = page.get_by_regex_in_rect(ur".*FREQUENCY.*", desig.x2,
                                              desig.y1, 100, desig.y2,
                                              re.DOTALL)

            #print "Looking in ",desig.y2+0.5,y2
            desigs = page.get_partially_in_rect(0, desig.y2 + 0.5,
                                                desig.x2 + 1, y2 - 0.8)
            #print "desigs,",repr(desigs)
            """
            def clump(desigs):
                out=[]
                y1=1e30
                y2=None
                for desig in desigs:
                    if y2!=None:
                        delta=desig.y1-y2
                        if delta>
                    y1=min(desig.y1,y1)
                    y2=max(desig.y2,y2)
                    out.append(desig.text)
            """
            #last_curfreq=None
            #out=[]

            if re.match(ur".*ATS\s*SERVICES\s*DELEGATION.*", area):
                break

            raws = []
            found_x1 = None
            for sub in desigs:
                #print "\n\n-> y2",y2," cur sub:",sub.y1
                if sub.y1 >= y2:
                    break
                wholerow = page.get_lines2(
                    page.get_partially_in_rect(0, sub.y1 + 0.25, 100,
                                               sub.y2 - 0.25))
                wholerowstr = " ".join(wholerow)
                #print "Parse:<%s>"%(wholerowstr,)
                if re.match(ur".*\d+\.\d+\s+[\w\s*]+CONTROL\s*AREA\s*$",
                            wholerowstr, re.UNICODE):
                    break
                if re.match(ur".*\d+\s+ATS\s*SERVICES\s*DELEGATION.*",
                            wholerowstr, re.UNICODE):
                    break

Example #8

0

Show file

File: ep_parse_tma.py Project: avl/SwFlightPlanner

def ep_parse_tma():
    spaces=[]
    pages,date=miner.parse('/_Poland_EP_ENR_2_1_en.pdf',
                           country='ep',usecache=True,
                           maxcacheage=86400*7
                           )
    
    
    
    for nr,page in enumerate(pages):
        #if nr!=1: continue
        #print "page",nr
        #print page.items
        desigs=page.get_by_regex(ur".*DESIGNATION AND LATERAL.*",re.DOTALL)
        for desig,next in izip(desigs,desigs[1:]+[None]):
            
            
            
            if nr==0:
                #FIR
                
                uwagi=page.get_by_regex_in_rect(ur".*UWAGI\s*/\s*REMARKS.*",
                                0,desig.y2,100,100,re.DOTALL)[0]
                coords=page.get_lines2(page.get_partially_in_rect(
                        0,desig.y2+0.5,desig.x2+10,uwagi.y1-0.5))
                
                raw="\n".join(coords)
                #print "Raw:\n",raw
                d=md5.md5(raw.encode('utf8')).hexdigest()
                assert d=="f336800a8183f1360415d2afef38e9ae"
                #print "Md5-digest",d
#/further along the state border to the point 54°36’14.03”N 019°24’15.02”E -

                raw=fixup(u"""
54°27’28.03”N 019°38’24.05”E -
54°36’14.03”N 019°24’15.02”E -
55°50’58.98”N 017°32’52.80”E -
54°54’58.84”N 015°51’52.92”E -
54°55’00.00”N 015°08’07.00”E -
/from this point the arc of 30 km radius centred at point 55°04’04”N 014°44’48”E -
54°55’00”N 014°21’27”E - 
54°07’38”N 014°15’17”E -
54°07’34”N 014°12’05”E -
53°59’16”N 014°14’32”E -
53°55’40”N 014°13’34”E -
<hack_longway_around_border>/further along the state border to the point 542615N 0194751E
                """)
                
                ##print "rw:",raw 
                fir=mapper.parse_coord_str(raw,context='poland')
                fir_context=[fir]#In principle, a FIR could consist of multiple non-overlapping regions. In this case, the list here would contain more than one list of points
                #print fir
                #sys.exit(1)
                
                spaces.append(                            
                    dict(
                         points=fir,
                         name="WARSZAWA FIR",
                         icao="EPWW",
                         floor="GND",
                         ceiling="-",
                         freqs=[],
                         type="FIR",
                         date=date
                         ))
                continue

            areas=page.get_partially_in_rect(50,desig.y1-3,100,desig.y1-0.5)
            #print "partially: <%s>"%(areas,)
            if len(areas)==0:
                #print "Found continuation of area:",area
                pass
            else:
                lines=[]
                for s in reversed(page.get_lines2(areas)):
                    if s.y1>=desig.y1: break
                    if re.match("\d+ \w{3} 2[01]\d{2}",s):
                        break
                    if re.match(ur"\s*AIP\s*POLAND\s*",s):
                        #not real area.
                        break
                    if s.count("Responsibility boundary within SECTOR"):
                        lines=[] #not real area name
                        break
                    m=re.match(".*\d+\.?\d*\s*([\w\s()]+)\s*$",s,re.UNICODE)
                    if m:
                        print "matched name",s,"as: <%s>"%(m.groups())
                        lines=[m.groups()[0]]
                        break
                    lines.append(s.strip())
                    
                if len(lines)==0:
                    pass
                    #print "Continuation of area:",area
                else:
                    area=" ".join(lines)
                    print "areastr:",area 
            
            print "Parsing area\n-------------------------------------------------\n\n",area            
            uwagis=page.get_by_regex_in_rect(ur".*UWAGI/REMARKS.*",
                            0,desig.y2+1,100,100,re.DOTALL)
            y2=100
            if len(uwagis):
                #print "Uwagi y1:",uwagis[0].y1
                y2=min(uwagis[0].y1-0.1,y2)
            if next:
                y2=min(next.y1,y2)
                #print "next.y1",next.y1
            #print "End of desig",y2
            #print desig
            units=page.get_by_regex_in_rect(ur".*UNIT PROVIDING.*",
                                desig.x2,desig.y1,100,desig.y2,re.DOTALL)
            if len(units)==0: continue
            unit,=units
            vertlim,=page.get_by_regex_in_rect(ur".*VERTICAL LIMITS.*",
                                desig.x2,desig.y1,100,desig.y2,re.DOTALL)
            freq,=page.get_by_regex_in_rect(ur".*FREQUENCY.*",
                                desig.x2,desig.y1,100,desig.y2,re.DOTALL)
            
            #print "Looking in ",desig.y2+0.5,y2
            desigs=page.get_partially_in_rect(0,desig.y2+0.5,desig.x2+1,y2-0.8)
            #print "desigs,",repr(desigs)
            """
            def clump(desigs):
                out=[]
                y1=1e30
                y2=None
                for desig in desigs:
                    if y2!=None:
                        delta=desig.y1-y2
                        if delta>
                    y1=min(desig.y1,y1)
                    y2=max(desig.y2,y2)
                    out.append(desig.text)
            """     
            #last_curfreq=None
            #out=[]
            
            if re.match(ur".*ATS\s*SERVICES\s*DELEGATION.*",area):
                break
            
            raws=[]
            found_x1=None
            for sub in desigs:
                #print "\n\n-> y2",y2," cur sub:",sub.y1
                if sub.y1>=y2:
                    break
                wholerow=page.get_lines2(page.get_partially_in_rect(0,sub.y1+0.25,100,sub.y2-0.25))
                wholerowstr=" ".join(wholerow)
                #print "Parse:<%s>"%(wholerowstr,)
                if re.match(ur".*\d+\.\d+\s+[\w\s*]+CONTROL\s*AREA\s*$",wholerowstr,re.UNICODE):
                    break
                if re.match(ur".*\d+\s+ATS\s*SERVICES\s*DELEGATION.*",wholerowstr,re.UNICODE):
                    break