Python alltexts 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: fplan.extract.html_helper

메소드/함수: alltexts

hotexamples.com에서의 예제들: 4

Python alltexts - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 fplan.extract.html_helper.alltexts에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: ev_parse_airfields.py 프로젝트: avl/SwFlightPlanner

def ev_parse_airfields():
    ads=[]
    spaces=[]
    seen=set()
    cur_airac=get_cur_airac()
    assert cur_airac
    for icao in ["EVRA",
                "EVLA",
                "EVTJ",
                "EVVA"]:
        thrs=[]
        url="/eAIPfiles/%s-AIRAC/html/eAIP/EV-AD-2.%s-en-GB.html"%(cur_airac,icao)
        data,date=fetchdata.getdata(url,country='ev')
        parser=lxml.html.HTMLParser()
        parser.feed(data)
        tree=parser.close()
        elev=None
        pos=None
        ctrarea=None
        ctr=None
        ctralt=None
        ctrname=None
        adcharturl=None
        adchart=None
        adnametag,=tree.xpath("//p[@class='ADName']")
        adnamestr=alltext(adnametag)
        print adnamestr
        name,=re.match(ur"%s\s*[-—]\s*([\w\s]+)"%(icao,),adnamestr,re.UNICODE).groups()
        freqs=[]
        for table in tree.xpath("//table"):
            rows=list(table.xpath(".//tr"))
            
            headings=list(table.xpath(".//th"))
            
            if len(headings)==5:
                if headings[2]=="Frequency":
                    for row in rows:
                        cols=alltexts(table.xpath(".//td"))
                        desig,name=cols[0:2]
                        freq,=re.match(ur"\d{3}\.\d{3}\s*MHz",cols[2]).groups()
                        if freq!="121.500":
                            freqs.append((desig+" "+name,float(freq)))                        
                        
                    continue
                
            
            for row in rows:
                cols=alltexts(row.xpath(".//td"))
                print "cols:",repr(cols)
                if len(cols)<2: continue
                if not pos and re.match(ur".*ARP\s*coordinates.*",cols[1]):
                    pos,=mapper.parsecoords(cols[2])
                if not elev and re.match(ur"Elevation.*",cols[1]):
                    elev,=re.match(ur"(\d+) FT.*",cols[2]).groups()
                
                if not ctr and re.match(ur"Designation\s*and\s*lateral\s*limits",cols[1]):
                    lines=cols[2].split("\n")
                    ctr=True
                    print "Got lateral limits",lines[0]
                    try:
                        ctrname,type_=re.match(ur"^([\w\s]+)(CTR|TIZ)",lines[0]).groups()
                        ctrarea=" ".join(lines[1:])
                    except:
                        ctrname,type_=re.match(ur"^([\w\s]+)(CTR|TIZ)",lines[0]+lines[1]).groups()
                        ctrarea=" ".join(lines[2:])
                    assert ctrname.strip()
                    ctrname=ctrname.strip()+" "+type_
                    
                #print ".",cols[1],"."
                if not ctralt and re.match(ur".*Vertical\s*limits.*",cols[1],re.UNICODE):
                    ctralt=True
                    #print "<",cols[2],">"
                    alts=cols[2].split("/")
                    if len(alts)==1:                    
                        ceiling=alts[0]
                        floor="GND"
                    else:
                        ceiling,floor=alts
                    print "Parsed",ceiling,floor

예제 #2

파일 보기

파일: ee_parse_airfields2.py 프로젝트: dimme/SwFlightPlanner

def ee_parse_airfields2():
    ads = []
    spaces = []
    airac_date = get_airac_date()
    print "airac", airac_date
    overview_url = "/%s/html/eAIP/EE-AD-0.6-en-GB.html" % (airac_date, )

    parser = lxml.html.HTMLParser()
    data, date = fetchdata.getdata(overview_url, country='ee')
    parser.feed(data)
    tree = parser.close()
    icaos = []
    for cand in tree.xpath(".//h3"):
        txts = alltexts(cand.xpath(".//a"))
        aps = re.findall(r"EE[A-Z]{2}", " ".join(txts))
        if aps:
            icao, = aps
            if alltext(cand).count("HELIPORT"):
                print "Ignore heliport", icao
                continue
            icaos.append(icao)

    for icao in icaos:
        ad = dict(icao=icao)
        url = "/%s/html/eAIP/EE-AD-2.%s-en-GB.html" % (airac_date, icao)
        data, date = fetchdata.getdata(url, country='ee')
        parser.feed(data)
        tree = parser.close()
        thrs = []

        for h3 in tree.xpath(".//h3"):
            txt = alltext(h3)
            print repr(txt)
            ptrn = ur"\s*%s\s+[—-]\s+(.*)" % (unicode(icao.upper()), )
            m = re.match(ptrn, txt, re.UNICODE)
            if m:
                assert not 'name' in ad
                ad['name'] = m.groups()[0]

        for tr in tree.xpath(".//tr"):
            txt = alltext(tr)
            m = re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*",
                         txt)
            #print "Matching,",txt,":",m
            if m:
                crds, = m.groups()
                ad['pos'] = mapper.anyparse(crds)

        space = dict()
        for table in tree.xpath(".//table"):
            for tr in table.xpath(".//tr"):
                trtxt = alltext(tr)
                if trtxt.count("Designation and lateral limits"):
                    space = dict()
                    coords = tr.getchildren()[2]
                    lines = alltext(coords).split("\n")
                    if lines[0].strip() == 'NIL':
                        continue

                    zname, what, spill = re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)",
                                                  lines[0]).groups()
                    if spill and spill.strip():
                        rest = [spill] + lines[1:]
                    else:
                        rest = lines[1:]
                    what = what.strip()
                    assert ad['name'].upper().strip().count(
                        zname.upper().strip())
                    assert what in ['FIZ', 'TIZ', 'CTR']
                    space['type'] = what
                    space['points'] = mapper.parse_coord_str("\n".join(rest))

                    space['name'] = zname + " " + what
                    space['date'] = date
                    space['url'] = fetchdata.getrawurl(url, 'ee')

                if trtxt.count("Vertical limits"):
                    vlim = alltext(tr.getchildren()[2])
                    if vlim.strip() == 'NIL': continue
                    space['floor'], space['ceiling'] = vlim.split(" to ")

                #space['freqs']=x

        #hlc=False
        for h4 in tree.xpath(".//h4"):
            txt = alltext(h4)
            if txt.lower().count("charts"):
                par = h4.getparent()
                for table in par.xpath(".//table"):
                    for idx, tr in enumerate(table.xpath(".//tr")):
                        name,page=\
                            tr.getchildren()
                        nametxt = alltext(name)
                        print "nametxt:", nametxt, "link:"
                        for reg, variant in [
                            (r"Aerodrome.*Chart.*", ""),
                            (r"Landing.*Chart.*", "landing"),
                            (r".*Parking.*Chart.*", "parking"),
                            (r".*Visual.*Approach.*|.*\bVAC\b.*", "vac")
                        ]:
                            if re.match(reg, nametxt):
                                for a in page.xpath(".//a"):
                                    print "linklabel", a.text
                                    print "attrib:", a.attrib
                                    href = a.attrib['href']
                                    print "Bef repl", href
                                    if href.lower().endswith("pdf"):
                                        href = href.replace(
                                            "../../graphics",
                                            "/%s/graphics" % (airac_date, ))
                                        print "href:", href, airac_date
                                        assert href
                                        parse_landing_chart.help_plc(
                                            ad,
                                            href,
                                            icao,
                                            ad['pos'],
                                            "ee",
                                            variant=variant)
                                        """arp=ad['pos']
                                        lc=parse_landing_chart.parse_landing_chart(
                                                href,
                                                icao=icao,
                                                arppos=arp,country="ee")
                                        assert lc
                                        if lc:
                                            ad['adcharturl']=lc['url']
                                            ad['adchart']=lc
                                            hlc=True
                                            #chartblobnames.append(lc['blobname'])
                                        """
        #assert hlc
        for h4 in tree.xpath(".//h4"):
            txt = alltext(h4)
            if txt.count("RUNWAY PHYSICAL"):
                par = h4.getparent()

                for table in par.xpath(".//table"):
                    prevnametxt = ""
                    for idx, tr in enumerate(table.xpath(".//tr")):
                        if idx == 0:
                            fc = alltext(tr.getchildren()[0])
                            print "FC", fc
                            if not fc.count("Designations"):
                                break  #skip table
                        if idx < 2: continue
                        if len(tr.getchildren()) == 1: continue
                        print "c:", tr.getchildren(), alltexts(
                            tr.getchildren())
                        desig, trubrg, dims, strength, thrcoord, threlev = tr.getchildren(
                        )
                        rwy = re.match(r"(\d{2}[LRC]?)", alltext(desig))
                        altc = alltext(thrcoord)
                        print "Matching", altc
                        print "rwymatch:", alltext(desig)
                        m = re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*",
                                     altc, re.DOTALL | re.MULTILINE)
                        if m:
                            lat, lon = m.groups()
                            print "Got latlon", lat, lon
                            thrs.append(
                                dict(pos=mapper.parse_coords(lat, lon),
                                     thr=rwy.groups()[0]))

        space['freqs'] = []
        for h4 in tree.xpath(".//h4"):
            txt = alltext(h4)
            if txt.count("ATS COMMUNICATION"):
                par = h4.getparent()
                for table in par.xpath(".//table"):
                    for idx, tr in enumerate(table.xpath(".//tr")):
                        print "cs", repr(tr.getchildren()), alltexts(
                            tr.getchildren())
                        print len(tr.getchildren())
                        if len(tr.getchildren()) != 5:
                            if "".join(alltexts(
                                    tr.getchildren())).count(u"EMERG"):
                                continue  #Sometimes emergency freq is listed, and then it is without callsign
                        service,callsign,frequency,hours,remarks=\
                            tr.getchildren()
                        callsigntxt = alltext(callsign)
                        if idx < 2:
                            if idx == 0:
                                assert callsigntxt.strip() == "Call sign"
                            if idx == 1:
                                assert callsigntxt.strip() == "2"
                            continue
                        ftext = alltext(frequency)
                        print "matching freq", ftext
                        for freq in re.findall(ur"\b\d{3}\.\d{1,3}", ftext):
                            freqmhz = float(freq)
                            space['freqs'].append(
                                (callsigntxt.strip(), freqmhz))

        if space and 'points' in space:
            assert 'freqs' in space
            assert 'points' in space
            assert 'floor' in space
            assert 'ceiling' in space
            assert 'type' in space
            spaces.append(space)
        if thrs:
            ad['runways'] = rwy_constructor.get_rwys(thrs)

        aip_text_documents.help_parse_doc(ad,
                                          url,
                                          icao,
                                          "ee",
                                          title="General Information",
                                          category="general")

        ad['date'] = date
        ad['url'] = fetchdata.getrawurl(url, 'ee')
        print "AD:", ad
        assert 'pos' in ad
        assert 'name' in ad
        ads.append(ad)

예제 #3

파일 보기

파일: ev_parse_airfields.py 프로젝트: avl/SwFlightPlanner

        for h4 in tree.xpath(".//h4"):
            txt=alltext(h4)
            if txt.count("RUNWAY PHYSICAL"):
                par=h4.getparent()

                for table in par.xpath(".//table"):
                    prevnametxt=""
                    for idx,tr in enumerate(table.xpath(".//tr")):
                        if idx==0:
                            fc=alltext(tr.getchildren()[0])
                            print "FC",fc
                            if not fc.count("Designations"):
                                break #skip table
                        if idx<2:continue
                        if len(tr.getchildren())==1:continue
                        print "c:",tr.getchildren(),alltexts(tr.getchildren())
                        desig,trubrg,dims,strength,thrcoord,threlev=tr.getchildren()
                        rwy=re.match(r"(\d{2}[LRC]?)",alltext(desig))
                        altc=alltext(thrcoord)
                        print "Matching",altc
                        print "rwymatch:",alltext(desig)
                        m=re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*",altc,re.DOTALL|re.MULTILINE)                        
                        if m:
                            lat,lon=m.groups()
                            print "Got latlon",lat,lon
                            thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy.groups()[0]))         
                        
        addummy=dict()
        
        for h4 in tree.xpath(".//h4"):
            txt=alltext(h4)

예제 #4

파일 보기

파일: ee_parse_airfields2.py 프로젝트: avl/SwFlightPlanner

def ee_parse_airfields2():
    ads=[]
    spaces=[]
    airac_date=get_airac_date()
    print "airac",airac_date
    overview_url="/%s/html/eAIP/EE-AD-0.6-en-GB.html"%(airac_date,)
        
    parser=lxml.html.HTMLParser()
    data,date=fetchdata.getdata(overview_url,country='ee')
    parser.feed(data)
    tree=parser.close()
    icaos=[]
    for cand in tree.xpath(".//h3"):
        txts=alltexts(cand.xpath(".//a"))
        aps=re.findall(r"EE[A-Z]{2}"," ".join(txts))
        if aps:
            icao,=aps
            if alltext(cand).count("HELIPORT"):
                print "Ignore heliport",icao
                continue
            icaos.append(icao)
    
    for icao in icaos:
        ad=dict(icao=icao)
        url="/%s/html/eAIP/EE-AD-2.%s-en-GB.html"%(airac_date,icao)
        data,date=fetchdata.getdata(url,country='ee')
        parser.feed(data)
        tree=parser.close()
        thrs=[]


        
        for h3 in tree.xpath(".//h3"):
            txt=alltext(h3)
            print repr(txt)
            ptrn=ur"\s*%s\s+[—-]\s+(.*)"%(unicode(icao.upper()),)
            m=re.match(ptrn,txt,re.UNICODE)
            if m:
                assert not 'name' in ad
                ad['name']=m.groups()[0]
                
        for tr in tree.xpath(".//tr"):
            txt=alltext(tr)
            m=re.match(r".*coordinates\s*and\s*site.*(\d{6}N\s*\d{7}E).*",txt)
            #print "Matching,",txt,":",m 
            if m:
                crds,=m.groups()
                ad['pos']=mapper.anyparse(crds)
                
        space=dict()
        for table in tree.xpath(".//table"):
            for tr in table.xpath(".//tr"):
                trtxt=alltext(tr)
                if trtxt.count("Designation and lateral limits"):
                    space=dict()
                    coords=tr.getchildren()[2]
                    lines=alltext(coords).split("\n")
                    if lines[0].strip()=='NIL':
                        continue
                    
                    
                    zname,what,spill=re.match(ur"(.*)\s+(CTR|TIZ|FIZ)(.*)",lines[0]).groups()
                    if spill and spill.strip():
                        rest=[spill]+lines[1:]
                    else:
                        rest=lines[1:]
                    what=what.strip()
                    assert ad['name'].upper().strip().count(zname.upper().strip())
                    assert what in ['FIZ','TIZ','CTR']
                    space['type']=what
                    space['points']=mapper.parse_coord_str("\n".join(rest))

                    space['name']=zname+" "+what
                    space['date']=date
                    space['url']=fetchdata.getrawurl(url,'ee')
                 
                    
                if trtxt.count("Vertical limits"):
                    vlim=alltext(tr.getchildren()[2])
                    if vlim.strip()=='NIL': continue
                    space['floor'],space['ceiling']=vlim.split(" to ")
                    
                #space['freqs']=x
                
        #hlc=False
        for h4 in tree.xpath(".//h4"):
            txt=alltext(h4)
            if txt.lower().count("charts"):
                par=h4.getparent()
                for table in par.xpath(".//table"):
                    for idx,tr in enumerate(table.xpath(".//tr")):
                        name,page=\
                            tr.getchildren()
                        nametxt=alltext(name)
                        print "nametxt:",nametxt,"link:"
                        for reg,variant in [
                                           (r"Aerodrome.*Chart.*","") ,
                                           (r"Landing.*Chart.*","landing"), 
                                           (r".*Parking.*Chart.*","parking"), 
                                           (r".*Visual.*Approach.*|.*\bVAC\b.*","vac")
                                            ]:
                            if re.match(reg,nametxt):
                                for a in page.xpath(".//a"):
                                    print "linklabel",a.text
                                    print "attrib:",a.attrib
                                    href=a.attrib['href']
                                    print "Bef repl",href
                                    if href.lower().endswith("pdf"):
                                        href=href.replace("../../graphics","/%s/graphics"%(airac_date,))
                                        print "href:",href,airac_date
                                        assert href
                                        parse_landing_chart.help_plc(ad,href,
                                                        icao,ad['pos'],"ee",variant=variant)
                                        """arp=ad['pos']
                                        lc=parse_landing_chart.parse_landing_chart(
                                                href,
                                                icao=icao,
                                                arppos=arp,country="ee")
                                        assert lc
                                        if lc:
                                            ad['adcharturl']=lc['url']
                                            ad['adchart']=lc
                                            hlc=True
                                            #chartblobnames.append(lc['blobname'])
                                        """                                                    
        #assert hlc
        for h4 in tree.xpath(".//h4"):
            txt=alltext(h4)
            if txt.count("RUNWAY PHYSICAL"):
                par=h4.getparent()

                for table in par.xpath(".//table"):
                    prevnametxt=""
                    for idx,tr in enumerate(table.xpath(".//tr")):
                        if idx==0:
                            fc=alltext(tr.getchildren()[0])
                            print "FC",fc
                            if not fc.count("Designations"):
                                break #skip table
                        if idx<2:continue
                        if len(tr.getchildren())==1:continue
                        print "c:",tr.getchildren(),alltexts(tr.getchildren())
                        desig,trubrg,dims,strength,thrcoord,threlev=tr.getchildren()
                        rwy=re.match(r"(\d{2}[LRC]?)",alltext(desig))
                        altc=alltext(thrcoord)
                        print "Matching",altc
                        print "rwymatch:",alltext(desig)
                        m=re.match(r"\s*(\d+\.?\d*N)[\s\n]*(\d+\.?\d*E).*",altc,re.DOTALL|re.MULTILINE)                        
                        if m:
                            lat,lon=m.groups()
                            print "Got latlon",lat,lon
                            thrs.append(dict(pos=mapper.parse_coords(lat,lon),thr=rwy.groups()[0]))         
                        
                                
        space['freqs']=[]
        for h4 in tree.xpath(".//h4"):
            txt=alltext(h4)
            if txt.count("ATS COMMUNICATION"):
                par=h4.getparent()
                for table in par.xpath(".//table"):
                    for idx,tr in enumerate(table.xpath(".//tr")):
                        print "cs",repr(tr.getchildren()),alltexts(tr.getchildren())
                        print len(tr.getchildren())
                        if len(tr.getchildren())!=5:
                            if "".join(alltexts(tr.getchildren())).count(u"EMERG"):
                                continue #Sometimes emergency freq is listed, and then it is without callsign
                        service,callsign,frequency,hours,remarks=\
                            tr.getchildren()
                        callsigntxt=alltext(callsign)
                        if idx<2:
                            if idx==0:
                                assert callsigntxt.strip()=="Call sign"
                            if idx==1:
                                 assert callsigntxt.strip()=="2"
                            continue
                        ftext=alltext(frequency)
                        print "matching freq",ftext
                        for freq in re.findall(ur"\b\d{3}\.\d{1,3}",ftext):
                            freqmhz=float(freq)                            
                            space['freqs'].append((callsigntxt.strip(),freqmhz))
                              
        if space and 'points' in space:
            assert 'freqs' in space
            assert 'points' in space
            assert 'floor' in space
            assert 'ceiling' in space
            assert 'type' in space
            spaces.append(space)
        if thrs:
            ad['runways']=rwy_constructor.get_rwys(thrs)
            
        aip_text_documents.help_parse_doc(ad,url,
                        icao,"ee",title="General Information",category="general")
            
        ad['date']=date
        ad['url']=fetchdata.getrawurl(url,'ee')   
        print "AD:",ad
        assert 'pos' in ad
        assert 'name' in ad
        ads.append(ad)