# -*- coding: utf-8 -*- # !/usr/bin/env python # Created by Vito on 9/21/15. import time import urllib2 import cStringIO import os.path as op from PIL import Image from app.utils.common.views import make_md5 from app.utils.logs.views import get_file_logger from config import PROJECT_ROOT __author__ = 'Vito' log = get_file_logger('file_error') def save_image(image_url, title='MZ'): """ 通过url获取图片并且保存 :param image_url: 图片url地址 :param title: 图片标题 :return: 存在数据库中的名字 """ try: file = urllib2.urlopen(image_url) tmpIm = cStringIO.StringIO(file.read()) im = Image.open(tmpIm) name = make_md5(title + str(time.time())) + '.jpg'
# -*- coding: utf-8 -*- # !/usr/bin/env python # Created by Vito on 8/7/15. import requests from lxml import etree from app import db from app.storage.models import Storage from app.utils.file.views import save_image from app.utils.logs.views import get_file_logger __author__ = 'Vito' log = get_file_logger('spider_web') user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) ' \ 'AppleWebKit/537.36 (KHTML, like Gecko) ' \ 'Chrome/45.0.2454.93 Safari/537.37' class SpiderWeb(object): @staticmethod def get_html(url='', headers=None, cookies=None, timeout=30, auth=None): """ :param url: 目标url :param cookies: 浏览器 cookies :param timeout: 超时时间 :param auth: 用户名密码 eg. auth=('user', 'pass') :param headers: 浏览器 headers :return: 目标url的html 内容