예제 #1
0
# -*- coding: utf-8 -*-
# !/usr/bin/env python
# Created by Vito on 9/21/15.
import time
import urllib2
import cStringIO
import os.path as op

from PIL import Image
from app.utils.common.views import make_md5
from app.utils.logs.views import get_file_logger
from config import PROJECT_ROOT

__author__ = 'Vito'

log = get_file_logger('file_error')


def save_image(image_url, title='MZ'):
    """
    通过url获取图片并且保存
    :param image_url: 图片url地址
    :param title: 图片标题
    :return: 存在数据库中的名字
    """
    try:
        file = urllib2.urlopen(image_url)
        tmpIm = cStringIO.StringIO(file.read())
        im = Image.open(tmpIm)

        name = make_md5(title + str(time.time())) + '.jpg'
예제 #2
0
# -*- coding: utf-8 -*-
# !/usr/bin/env python
# Created by Vito on 8/7/15.

import requests
from lxml import etree

from app import db
from app.storage.models import Storage
from app.utils.file.views import save_image
from app.utils.logs.views import get_file_logger

__author__ = 'Vito'

log = get_file_logger('spider_web')

user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) ' \
             'AppleWebKit/537.36 (KHTML, like Gecko) ' \
             'Chrome/45.0.2454.93 Safari/537.37'


class SpiderWeb(object):
    @staticmethod
    def get_html(url='', headers=None, cookies=None, timeout=30, auth=None):
        """
        :param url: 目标url
        :param cookies: 浏览器 cookies
        :param timeout: 超时时间
        :param auth: 用户名密码 eg. auth=('user', 'pass')
        :param headers: 浏览器 headers
        :return: 目标url的html 内容