Beispiel #1
0
def parse_page(parser,pagenr):   
    page=parser.parse_page_to_items(pagenr)
    out=[]
    for areaname,coords,meta in find_areas(page):
        print "Found area:",areaname
        if areaname.count("CTA") or areaname.count("FIR") or areaname.count("REF:ENR 2.2-3") or areaname.count("OCA"): continue
        
        assert areaname.count("TMA")
        lines=[x for x in page.get_lines(page.get_partially_in_rect(0,meta['y2']+0.5,100,meta['y2']+10)) if x.strip()]
        alts=[]
        for line in lines[:15]:
            print "Alt-parsing:",line
            m=re.match(ur"(FL \d+).*",line)
            if m:
               alts.append(m.groups()[0])
            m=re.match(ur"(\d+ FT AMSL).*",line)
            if m:
                alts.append(m.groups()[0])
            if len(alts)==2: break
        ceiling,floor=alts 
        identh,=page.get_by_regex(ur"IDENT")
        freqh,=page.get_by_regex(ur"FREQ")
        
        callsign= " ".join(page.get_lines(page.get_partially_in_rect(identh.x1,meta['y1']+0.25,freqh.x1-2.0,meta['y2'])))
        freqlines=" ".join(page.get_lines(page.get_partially_in_rect(freqh.x1,meta['y1'],freqh.x2,meta['y2'])))
        def wanted_freq(x):
            if abs(x-121.5)<1e-6: return False
            if x>150.0: return False
            return True
        freqs=[(callsign,float(x)) for x in re.findall(ur"\d{3}\.\d{3}",freqlines) if wanted_freq(float(x))]
Beispiel #2
0
def parse_page(parser, pagenr):
    page = parser.parse_page_to_items(pagenr)
    out = []
    for areaname, coords, meta in find_areas(page):
        print "Found area:", areaname
        if areaname.count("CTA") or areaname.count("FIR") or areaname.count(
                "REF:ENR 2.2-3") or areaname.count("OCA"):
            continue

        assert areaname.count("TMA")
        lines = [
            x for x in page.get_lines(
                page.get_partially_in_rect(0, meta['y2'] +
                                           0.5, 100, meta['y2'] + 10))
            if x.strip()
        ]
        alts = []
        for line in lines[:15]:
            print "Alt-parsing:", line
            m = re.match(ur"(FL \d+).*", line)
            if m:
                alts.append(m.groups()[0])
            m = re.match(ur"(\d+ FT AMSL).*", line)
            if m:
                alts.append(m.groups()[0])
            if len(alts) == 2: break
        ceiling, floor = alts
        identh, = page.get_by_regex(ur"IDENT")
        freqh, = page.get_by_regex(ur"FREQ")

        callsign = " ".join(
            page.get_lines(
                page.get_partially_in_rect(identh.x1, meta['y1'] + 0.25,
                                           freqh.x1 - 2.0, meta['y2'])))
        freqlines = " ".join(
            page.get_lines(
                page.get_partially_in_rect(freqh.x1, meta['y1'], freqh.x2,
                                           meta['y2'])))

        def wanted_freq(x):
            if abs(x - 121.5) < 1e-6: return False
            if x > 150.0: return False
            return True

        freqs = [(callsign, float(x))
                 for x in re.findall(ur"\d{3}\.\d{3}", freqlines)
                 if wanted_freq(float(x))]
Beispiel #3
0
def extract_single_sup(full_url, sup, supname, opening_ours):
    #print getxml("/AIP/AD/AD 1/ES_AD_1_1_en.pdf")
    ads = []
    try:
        p = Parser(sup)
    except Exception:
        print "Could't parse", sup
        #Some AIP SUP's contain invalid XML after conversion from PDF.
        #skip these for now
        return []
    areas = []
    startpage = None
    for pagenr in xrange(p.get_num_pages()):
        page = p.parse_page_to_items(pagenr)
        #print page.get_all_items()
        for item in page.get_by_regex(".*HOURS OF OPERATION.*"):
            lines = page.get_lines(
                page.get_partially_in_rect(0, item.y1 - 2, 100, item.y2 + 2))
            found = False
            for line in lines:
                if re.match(ur".*SUP\s*\d+/\d{4}\.?\s+HOURS OF OPERATION\s*$",
                            line):
                    opening_ours.add(p.get_url())
                    print "Found hours:", opening_ours

        try:
            for areaname, coords, meta in find_areas(page):
                if areaname:
                    name = "%s (on page %d of %s)" % (areaname, pagenr + 1,
                                                      supname)
                else:
                    name = "Area on page %d of %s" % (pagenr + 1, supname)

                print "Number of points", len(coords)
                areas.append(
                    dict(url=full_url,
                         pagenr=pagenr + 1,
                         sup=supname,
                         name=name,
                         type='aip_sup',
                         points=coords))
        except Exception:
            pass
Beispiel #4
0
def extract_single_sup(full_url,sup,supname,opening_ours):
    #print getxml("/AIP/AD/AD 1/ES_AD_1_1_en.pdf")
    ads=[]
    try:
        p=Parser(sup)
    except Exception:
        print "Could't parse",sup
        #Some AIP SUP's contain invalid XML after conversion from PDF.
        #skip these for now
        return []
    areas=[]
    startpage=None
    for pagenr in xrange(p.get_num_pages()):            
        page=p.parse_page_to_items(pagenr)
        #print page.get_all_items()
        for item in page.get_by_regex(".*HOURS OF OPERATION.*"):
            lines=page.get_lines(page.get_partially_in_rect(0,item.y1-2,100,item.y2+2))
            found=False
            for line in lines:
                if re.match(ur".*SUP\s*\d+/\d{4}\.?\s+HOURS OF OPERATION\s*$",line):
                    opening_ours.add(p.get_url())
                    print "Found hours:",opening_ours

        try:
            for areaname,coords,meta in find_areas(page):
                if areaname:
                    name="%s (on page %d of %s)"%(areaname,pagenr+1,supname)
                else:
                    name="Area on page %d of %s"%(pagenr+1,supname)
                    
                    
                print "Number of points",len(coords)
                areas.append(dict(
                        url=full_url,
                        pagenr=pagenr+1,
                        sup=supname,
                        name=name,
                        type='aip_sup',
                        points=coords))
        except Exception:
            pass