def download_image(url):
    # 把图片下载下来
    # 指定图片下载路径
    # 给这个分配一个名字
    split_list = url.split('/')
    filename = split_list.pop()
    path = os.path.join('images', filename)
    urllib.urlretrieve(url, filename=path)
def customer():
    while True:
        gLock.acquire()
        if len(FACE_URL_LIST) == 0:
            gLock.release()
            continue
        else:
            face_url = FACE_URL_LIST.pop()
            gLock.release()
            split_list = face_url.split('/')
            filename = split_list.pop()
            path = os.path.join('images', filename)
            urllib.urlretrieve(face_url, filename=path)
Exemple #3
0
def NARRdload(bdate, hr, filedir):
    if not os.path.isdir(filedir):
        os.makedirs(filedir)
        print('create foler: {}'.format(filedir))

    flist = []
    for i, day in enumerate(bdate):
        webdir = day[0:6]
        fname = 'narr-a_221_%s_%s00_000.grb' % (day, hr)
        flist.append('%s/%s' % (filedir, fname))
        weburl = 'http://nomads.ncdc.noaa.gov/data/narr/%s/%s/%s' % (
            webdir, day, fname)
        dname = '%s/%s' % (filedir, fname)
        print('Downloading %d of %d: %s' % (i + 1, len(bdate), fname))
        if not os.path.exists(dname):
            urllib3.urlretrieve(weburl, dname)

    return flist
from urllib.parse import parse_qs, urlparse
import requests
from bs4 import BeautifulSoup, SoupStrainer
import csv
import webbrowser
import urllib3

with open('../ExtractedNumbers.csv', 'r') as serials:
    fieldnames = ['Serial_Number', 'Owner']
    try:
        for Serial_Number, line in enumerate(serials, i):
            response = urllib3.urlretrieve(
                line,
                "http://www.utahcounty.gov/LandRecords/Property.asp?av_serial="
                + str(Serial_Number))
    except:
        pass
serials.close()
print('Done')

only_td_tags = SoupStrainer("td")
soup = BeautifulSoup(response.text, "html.parser", parse_only=only_td_tags)
targetCell = soup.find(text="Mailing Address:")
print(targetCell.parent.parent.text)
Exemple #5
0
fo = open("/Users/Personals/%s" % user_id, "wb")
fo.write(result)
word_path = os.getcwd() + '/%d' % user_id
print u'文字微博爬取完毕'

link = ""
fo2 = open("/Users/Personals/%s_imageurls" % user_id, "wb")
for eachlink in urllist_set:
    link = link + eachlink + "\n"
fo2.write(link)
print u'图片链接爬取完毕'

if not urllist_set:
    print u'该页面中不存在图片'
else:
    #下载图片,保存在当前目录的pythonimg文件夹下
    image_path = os.getcwd() + '/weibo_image'
    if os.path.exists(image_path) is False:
        os.mkdir(image_path)
    x = 1
    for imgurl in urllist_set:
        temp = image_path + '/%s.jpg' % x
        print u'正在下载第%s张图片' % x
        try:
            urllib3.urlretrieve(urllib3.urlopen(imgurl).geturl(), temp)
        except:
            print u"该图片下载失败:%s" % imgurl
        x += 1

print u'原创微博爬取完毕,共%d条,保存路径%s' % (word_count - 4, word_path)
print u'微博图片爬取完毕,共%d张,保存路径%s' % (image_count - 1, image_path)
Exemple #6
0
# py抓取页面图片并保存到本地


# 获取页面信息
def getHtml(url):
    http = urllib3.PoolManager()
    request = http.request('GET', url)
    html = str(request.data, encoding="utf-8")
    return html


# 通过正则获取图片
def getImg(html):
    reg = r'src="(.+?\.jpg)" pic_ext'
    imgre = re.compile(reg)
    imglist = re.findall(imgre, html)
    # 循环把图片存到本地
    return imglist


html = getHtml("http://tieba.baidu.com/p/2460150866")
# print(html)
imglist = getImg(html)
print(imglist)
x = 0
for imgurl in imglist:
    # 保存到本地
    urllib3.urlretrieve(imgurl, '/Applications/MAMP/image/%s.jpg' % x)
    x += 1
Exemple #7
0
from selenium import webdriver
import urllib3
from selenium.webdriver.common.keys import Keys

driver = webdriver.Chrome('chromedriver.exe')
word = "coffee"
url = "http://images.google.com/search?q="+word+"&tbm=isch&sout=1"
driver.get(url)
imageXpathSelector = '//*[@id="islrg"]/div[1]/div[1]/a[1]/div[1]/img'
img = driver.find_element_by_xpath(imageXpathSelector)

src = (img.get_attribute('src'))
urllib3.urlretrieve(src, word+".jpg")
driver.close()