def parse_doc(path, icao, country, title, category):
    print "Parsing AIP doc"
    icao = icao.upper()
    assert len(icao) == 4
    url = fetchdata.getrawurl(path, country=country)
    ret = dict()
    ret['icao'] = icao
    ret['url'] = url
    ret['title'] = title
    ret['name'] = icao + " - " + title
    ret['category'] = category
    #data,nowdate=fetchdata.getdata(path,country=country,maxcacheage=7200)
    blobname = icao + "_" + category
    tmppath = os.path.join(os.getenv("SWFP_DATADIR"), "aiptext", icao)
    if not os.path.exists(tmppath):
        os.makedirs(tmppath)

    if path.lower().endswith("pdf"):
        outpath_inter = os.path.join(tmppath, blobname + ".tmp.html")

        def render(inputfile, outputfile):
            r = "pdftohtml -c -s -i -zoom 2 -noframes -nodrm %s %s" % (
                inputfile, outputfile
            )  #-s is not supported on older pdftohtml, and doesn't appear necessary either.
            print "running", r
            assert 0 == os.system(r)

        fetchdata.getcreate_derived_data_raw(path,
                                             outpath_inter,
                                             render,
                                             "html",
                                             country=country)

        whole = open(outpath_inter).read()

        fixed = (whole.replace("<BODY bgcolor=\"#A0A0A0\"",
                               "<BODY bgcolor=\"#FFFFFF\"").replace(
                                   "<TITLE>Microsoft Word - ", "<TITLE>"))

    else:
        assert path.endswith("html")
        fixed, date = fetchdata.getdata(path, country=country)

    cksum = md5.md5(fixed).hexdigest()
    outpath = os.path.join(tmppath, blobname + "." + cksum + ".html")
    f = open(outpath, "w")
    f.write(fixed)
    f.close()
    #print "Wrote raw:",out,outpath

    ret['checksum'] = cksum
    ret['date'] = fetchdata.get_filedate(outpath)
    ret['blobname'] = blobname

    return ret
Beispiel #2
0
def getsvg(path,pagenr,usecache=True):
    assert type(pagenr)==int
    inputfile=fetchdata.getdatafilename(path,country="se",maxcacheage=7200)
    
    svged=fetchdata.getcachename(path,'svg')
    if os.path.exists(svged) and usecache:
        cacheddate=fetchdata.get_filedate(svged)
        print "Cached svg version exists, date:",svged,cacheddate
        if fetchdata.is_devcomp() or datetime.now()-cacheddate<timedelta(0,86400/2):
            print "Using svg cache"
            try:
                return open(svged).read()
            except Exception,cause:
                print "Couldn't read cached svg version",cause
Beispiel #3
0
def getsvg(path, pagenr, usecache=True):
    assert type(pagenr) == int
    inputfile = fetchdata.getdatafilename(path, country="se", maxcacheage=7200)

    svged = fetchdata.getcachename(path, 'svg')
    if os.path.exists(svged) and usecache:
        cacheddate = fetchdata.get_filedate(svged)
        print "Cached svg version exists, date:", svged, cacheddate
        if fetchdata.is_devcomp() or datetime.now() - cacheddate < timedelta(
                0, 86400 / 2):
            print "Using svg cache"
            try:
                return open(svged).read()
            except Exception, cause:
                print "Couldn't read cached svg version", cause
def parse_doc(path,icao,country,title,category):
    print "Parsing AIP doc"
    icao=icao.upper()
    assert len(icao)==4
    url=fetchdata.getrawurl(path,country=country)
    ret=dict()
    ret['icao']=icao
    ret['url']=url
    ret['title']=title
    ret['name']=icao+" - "+title
    ret['category']=category
    #data,nowdate=fetchdata.getdata(path,country=country,maxcacheage=7200)
    blobname=icao+"_"+category
    tmppath=os.path.join(os.getenv("SWFP_DATADIR"),"aiptext",icao)
    if not os.path.exists(tmppath):
        os.makedirs(tmppath)
    
    if path.lower().endswith("pdf"):
        outpath_inter=os.path.join(tmppath,blobname+".tmp.html")
        def render(inputfile,outputfile):
            r="pdftohtml -c -s -i -zoom 2 -noframes -nodrm %s %s"%(inputfile,outputfile)  #-s is not supported on older pdftohtml, and doesn't appear necessary either.
            print "running",r
            assert 0==os.system(r)
                
        fetchdata.getcreate_derived_data_raw(
                    path,outpath_inter,render,"html",country=country)
        
        whole=open(outpath_inter).read()
        
        fixed=(whole.replace("<BODY bgcolor=\"#A0A0A0\"","<BODY bgcolor=\"#FFFFFF\"")
                .replace("<TITLE>Microsoft Word - ","<TITLE>"))
        
    else:
        assert path.endswith("html")
        fixed,date=fetchdata.getdata(path,country=country)
        
    cksum=md5.md5(fixed).hexdigest()
    outpath=os.path.join(tmppath,blobname+"."+cksum+".html")
    f=open(outpath,"w")
    f.write(fixed)        
    f.close()
    #print "Wrote raw:",out,outpath
        
    ret['checksum']=cksum
    ret['date']=fetchdata.get_filedate(outpath)
    ret['blobname']=blobname
    
    return ret