Python get_cached_url 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: nlptools.urlcache

메소드/함수: get_cached_url

hotexamples.com에서의 예제들: 8

Python get_cached_url - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 nlptools.urlcache.get_cached_url에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def get_context(url, matchtext, before, after):
    html = get_cached_url(url).read()
    textsegments = html_to_text(html)
    i = textsegments.find(matchtext)
    bigtext = textsegments[max(0, i - before):min(i +
                                                  after, len(textsegments))]
    return trim_to_words(bigtext)

예제 #2

파일 보기

def get_boss(query, start=0, count=10):
    url = get_boss_url(query, start, count)
    dom = XML(uc.get_cached_url("boss", url, pause=True))
    realstart = dom.find("resultset_web").attr("start")
    if int(realstart) == start:
        return dom.findAll("result")
    else:
        return None

예제 #3

파일 보기

파일: boss.py 프로젝트: milliondreams/think-link

def get_boss(query,start=0,count=10):
	url = get_boss_url(query,start,count)
	dom = XML(uc.get_cached_url("boss",url))
	realstart = dom.find("resultset_web").attr("start")
	if int(realstart) == start:
		return dom.findAll("result")
	else:
		return None

예제 #4

파일 보기

파일: download_raw.py 프로젝트: BenTrem/think-link

	def run(self):
		global totaldownloaded
		global totalfiles
		global urls
		global timeouts 
		print "thread running"
		while len(urls) > 0:
			url = urls.pop()
			if url.endswith("pdf"): continue
			try:
				content = uc.get_cached_url("pages",url,400000,2).read()			
				totaldownloaded += len(content)
				totalfiles += 1
				if totalfiles % 10 == 0:
					print "size:",len(content),"avg:",(totaldownloaded/totalfiles),"tot:",totaldownloaded,"cnt:",totalfiles,"tmo:",timeouts,"url:",url[:50]
			except:	
				timeouts += 1

예제 #5

파일 보기

파일: download_raw.py 프로젝트: BenTrem/think-link

 def run(self):
     global totaldownloaded
     global totalfiles
     global urls
     global timeouts
     print "thread running"
     while len(urls) > 0:
         url = urls.pop()
         if url.endswith("pdf"): continue
         try:
             content = uc.get_cached_url("pages", url, 400000, 2).read()
             totaldownloaded += len(content)
             totalfiles += 1
             if totalfiles % 10 == 0:
                 print "size:", len(content), "avg:", (
                     totaldownloaded / totalfiles
                 ), "tot:", totaldownloaded, "cnt:", totalfiles, "tmo:", timeouts, "url:", url[:
                                                                                               50]
         except:
             timeouts += 1

예제 #6

파일 보기

파일: features.py 프로젝트: BenTrem/think-link

def get_context(url,matchtext,before,after):
	html = get_cached_url(url).read()
	textsegments = html_to_text(html)
	i =  textsegments.find(matchtext)
	bigtext = textsegments[max(0,i-before):min(i+after,len(textsegments))]
	return trim_to_words(bigtext)

예제 #7

파일 보기

def boss_counts_for_pattern(pattern):
    """get the total number of hits for a pattern, and also download the first 50"""
    url = boss.get_boss_url(pattern, 0, 50)
    dom = XML(uc.get_cached_url("boss", url))
    hitcount = dom.find("resultset_web").attr("totalhits")
    return int(hitcount)

예제 #8

파일 보기

파일: regexpatterns.py 프로젝트: milliondreams/think-link

def boss_counts_for_pattern(pattern):
	"""get the total number of hits for a pattern, and also download the first 50"""
	url = boss.get_boss_url('"'+pattern+'"',0,50)
	dom = XML(uc.get_cached_url("boss",url))
	hitcount = dom.find("resultset_web").attr("deephits")
	return int(hitcount)