예제 #1
0
# coding:utf8
import urllib
import urllib2
import requests
import time
import logging
from CuteScrapy.util.logger import getLogger
from scrapy.utils import project

logging = getLogger('DownloadHelper')


# Python开发中时长遇到要下载文件的情况,最常用的方法就是通过Http利用urllib或者urllib2模块。
# 当然你也可以利用ftplib从ftp站点下载文件。此外Python还提供了另外一种方法requests。
# 下面来看看三种方法是如何来下载zip文件的:

class Download():
    def __init__(self):
        self.settings = project.get_project_settings()  # get settings
        self.path = self.settings.get("DOWNLOAD_DIR")

    def download2(self, url, name):
        start = time.time()
        urllib.urlretrieve(url, name)
        end = time.time()
        logging.info('[%s]download  spend %s seconds' % (name, end - start))
        # print 'download [%s] spend %s seconds' % (name, end - start)

    def download1(self, url, name):
        start = time.time()
        f = urllib2.urlopen(url)
예제 #2
0
# coding:utf8
import hashlib
import json
import datetime
import traceback
import requests
import time

from CuteScrapy.model.wxindex import WXIndex, WXIndexModel
from CuteScrapy.util.MysqlUtils import ORM
from CuteScrapy.util.logger import getLogger

logging = getLogger('WXSearch')


class WXSearch():
    def __init__(self):
        self.orm = ORM()
        self.session = self.orm.getSession()
        self.wxindex = WXIndex()

    def run(self, keywords):
        logging.info(keywords)
        now = time.time()
        end_time = str('%.3f' % (now - 24 * 3600))
        start_time = str('%.3f' % (now - 90 * 24 * 3600))
        o = 1490609811174
        headers = {
            'Cookie':
            WXIndexModel.getCookies().encode('utf8'),
            'Referer':
예제 #3
0
# coding:utf8
import time

from CuteScrapy.model.proxy import Proxy
from CuteScrapy.util.CommonParser import CommonParser
from CuteScrapy.util.logger import getLogger

__author__ = 'HuijunZhang'

logging = getLogger('ProxyCheck')


# 检测代理是否失效,并删除失效代理
class ProxyCheck():
    def __init__(self):
        self.proxy = Proxy()
        self.commonParser = CommonParser()

    def run(self):
        for item in self.proxy.getProxyData():
            result = CommonParser().check_proxy(item.type, item.id)
            if not result.get('status'):
                status = self.proxy.delByid(item.id)
                if not status:
                    logging.error('id:%s,delete failed' % item.id)
                else:
                    logging.info('id:%s is expires.' % item.id)
            time.sleep(5)


if __name__ == '__main__':
예제 #4
0
# coding:utf8
import zbar
from PIL import Image
import urllib
import cStringIO
import json
import time
import requests
from CuteScrapy.util.logger import getLogger

__author__ = 'HuijunZhang'

ak = '05Unerzh8DGNMf78det8fZB2cPSQLVv3'  # 百度前端ak
logging = getLogger('commonparser')


class CommonParser():
    def __init__(self):
        pass

    def trim(self, string):
        if not string:
            return string
        string = string.replace(u'\r', u'').replace(u'\n', u'').replace(u'\t', u'')
        return string.strip()

    def parseLocationByIp(self, ip):
        '''
        http://lbsyun.baidu.com/index.php?title=webapi/ip-api
        :param ip:
        :return:
예제 #5
0
import MySQLdb
import time

import datetime

from CuteScrapy.util.CommonParser import CommonParser
from CuteScrapy.util.logger import getLogger

__author__ = 'zhanghj'
from CuteScrapy.resource.ResourceHelper import ResourceHelper
import requests
from lxml import etree
import json
import re

logging = getLogger('UpdateCityToLat')


# 城市转化为json
class UpdateCityToLat():
    def __init__(self):
        self.commonParser = CommonParser()
        self.my_conn = MySQLdb.connect(host='127.0.0.1',
                                       port=3306,
                                       user='******',
                                       passwd='',
                                       db='scrapy',
                                       charset="utf8")

    def run(self):