コード例 #1
0
import wsgiref
import urllib2
import os,sys
import timeout_urllib2; timeout_urllib2.sethttptimeout(4.0)
from sane_re import *

import tchelpers

os.system("mkdir -p httpcache.tc")
data_by_url = tchelpers.open("httpcache.tc/data_by_url.tch")
last_update = tchelpers.open("httpcache.tc/last_update.tch")

def application(environ, start_response):
  response_headers = [('Content-type', 'text/plain')]
  #start_response(status, response_headers)

  url = wsgiref.util.request_uri(environ)
  print url
  url = _S(url)['/(http.*)', 1]
  url = url.replace('http%3A//','http://')
  print url
  if url in data_by_url:
    print "CACHE HIT"
    data = data_by_url[url]
  else:
    try:
      print "URL FETCH"
      f = urllib2.urlopen(url)
      data = f.read()
      data_by_url[url] = data
    except (urllib2.URLError, timeout_urllib2.Error, socket.error), e:
コード例 #2
0
 def __init__(self, filename="background_model.tc", readonly=True):
   os.system("mkdir -p %s" % filename)
   flag = 'r' if readonly else 'c'
   self.counts = TokyoNgramProxy(tchelpers.open("%s/ngram_counts.hdb" % filename, flag))
   self.info = KVIntProxy(tchelpers.open_tc("%s/info.hdb" % filename, flag))