Python Gzzip.newOpen Examples

Programming Language: Python

Class/Type: Gzzip

Method/Function: newOpen

Examples at hotexamples.com: 3

Python Gzzip.newOpen - 3 examples found. These are the top rated real world Python examples of Gzzip.newOpen extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

newOpen(3)

Frequently Used Methods

newOpen (3)

Example #1

Show file

File: title.py Project: h2ero/note

def downloadImg(id,url):
    global WHERE
    global SQLS
    WHERE+=1
    #print WHERE,"条记录以入库"
    url="http://www.google.com/searchbyimage?image_url="+url
    print url
    res=Gzzip.newOpen(url)
    r=res.read()
    #print r
    #print res.headers
    pattern=r'initialize\((.*?)\)'
    url=re.findall(pattern,r,re.MULTILINE|re.S)
    url=url[0].replace('/search','http://www.google.com/search').replace('\\x','%')
    #print url
    url=urllib2.unquote(url).replace('&amp;','&')
    #delete ''
    url=url[1:-1]
    r=Gzzip.newOpen(url).read()
    pattern=r'italic">(.*?)</a'
    title=re.findall(pattern,r,re.MULTILINE|re.S)
    if len(title)==0:
        title=['no title']
    SQLS="UPDATE `spider` SET `is_named`=1,`title`='"+re.escape(title[0])+"' WHERE `id`="+str(id)+";"
    try:
        print SQLS
        res=cursor.execute(SQLS)
    except Exception:
        pass
    f.write(SQLS)

Example #2

Show file

File: spider.py Project: h2ero/note

def pageContent(url):
	r=Gzzip.newOpen("http://www.imgspark.com"+url)
	src=[]
	#src
	pattern=r'id="lrg_image" src="(.*?)"'
	src.append(re.findall(pattern,r,re.MULTILINE|re.S)[0])
	#location
	pattern=r'<span id="source_content"><a href="(.*?)"'
	try:
		src.append(re.findall(pattern,r,re.MULTILINE|re.S)[0])
	except Exception:
            if len(src)==1:
                 src.append('None')
	#tags
	pattern=r'<ul class="list_tags_horizontal">(.*?)</ul>'
	res=re.findall(pattern,r,re.MULTILINE|re.S)
	pattern=r'title="(.*?)"'
	tags=re.findall(pattern,res[0],re.MULTILINE|re.S)
	src.append(tags)
	return src

Example #3

Show file

File: spider.py Project: h2ero/note

def listhref(id):
    url="http://www.imgspark.com/image/popular/all/alltime/"+str(id)+"/"
    r=Gzzip.newOpen(url)
    pattern=r'<div class="image_wrap">\s+<a href="(.*?)"(?:.*?)\s+</div>'
    listHrefs=re.findall(pattern,r,re.MULTILINE|re.S)
    return listHrefs