Ejemplo n.º 1
0
    def process_item(self, item, spider):
        if item["slug"] in BLOCK_LIST:
            return item

        # replace image url
        print(u"抓取完毕: %s" % item["title"])
        content = item["content"]
        for img in item["images"]:
            path = get_image_name(img)
            Image.insert(
                    slug=item["slug"],
                    url=img,
                    path=path
                    ).execute()
            content = content.replace(img, '../images/%s' % path)

        try:
            Note.insert(
                title = item["title"],
                slug = item["slug"],
                url = item["url"],
                content = content,
                likes_count = int(item["likes_count"]),
                views_count = int(item["views_count"])
                ).execute()

        except IntegrityError as e:
            logger.warn('%s SKIP E: (%s)' % (dict(item), str(e)))

        return item
Ejemplo n.º 2
0
    def process_item(self, item, spider):
        if item["slug"] in BLOCK_LIST:
            return item

        # replace image url
        print(u"抓取完毕: %s" % item["title"])
        content = item["content"]
        for img in item["images"]:
            path = get_image_name(img)
            Image.insert(slug=item["slug"], url=img, path=path).execute()
            content = content.replace(img, '../images/%s' % path)

        try:
            Note.insert(title=item["title"],
                        slug=item["slug"],
                        url=item["url"],
                        content=content,
                        likes_count=int(item["likes_count"]),
                        views_count=int(item["views_count"])).execute()

        except IntegrityError as e:
            logger.warn('%s SKIP E: (%s)' % (dict(item), str(e)))

        return item
Ejemplo n.º 3
0
# -*- coding: utf-8 -*-
'''
File Name: jianshu/image.py
Author: JackeyGao
mail: [email protected]
Created Time: 五  1/ 8 14:50:27 2016
'''
import requests, shutil, re, sys
from jianshu.settings import DEFAULT_REQUEST_HEADERS as headers
from jianshu.db import Image

reload(sys)
sys.setdefaultencoding('utf-8')

images = Image.select().execute()

def get_image_name(url):
    group = re.findall('\d+-\w+.\w+', url)
    if not group:
        return None
    image_name = group[0]
    if 'imageMogr' in image_name:
        image_name = image_name.replace('?imageMogr2', '.jpg')

    return image_name

def request_image(url):
    image_name = get_image_name(url)
    print(u"正在下载 %s" % image_name)
    try:
        response = requests.get(url, headers=headers, 
Ejemplo n.º 4
0
# -*- coding: utf-8 -*-
'''
File Name: jianshu/image.py
Author: JackeyGao
mail: [email protected]
Created Time: 五  1/ 8 14:50:27 2016
'''
import requests, shutil, re, sys
from jianshu.settings import DEFAULT_REQUEST_HEADERS as headers
from jianshu.db import Image

reload(sys)
sys.setdefaultencoding('utf-8')

images = Image.select().execute()


def get_image_name(url):
    group = re.findall('\d+-\w+.\w+', url)
    if not group:
        return None
    image_name = group[0]
    if 'imageMogr' in image_name:
        image_name = image_name.replace('?imageMogr2', '.jpg')

    return image_name


def request_image(url):
    image_name = get_image_name(url)
    print(u"正在下载 %s" % image_name)