# coding:utf8 import urllib import urllib2 import requests import time import logging from CuteScrapy.util.logger import getLogger from scrapy.utils import project logging = getLogger('DownloadHelper') # Python开发中时长遇到要下载文件的情况,最常用的方法就是通过Http利用urllib或者urllib2模块。 # 当然你也可以利用ftplib从ftp站点下载文件。此外Python还提供了另外一种方法requests。 # 下面来看看三种方法是如何来下载zip文件的: class Download(): def __init__(self): self.settings = project.get_project_settings() # get settings self.path = self.settings.get("DOWNLOAD_DIR") def download2(self, url, name): start = time.time() urllib.urlretrieve(url, name) end = time.time() logging.info('[%s]download spend %s seconds' % (name, end - start)) # print 'download [%s] spend %s seconds' % (name, end - start) def download1(self, url, name): start = time.time() f = urllib2.urlopen(url)
# coding:utf8 import hashlib import json import datetime import traceback import requests import time from CuteScrapy.model.wxindex import WXIndex, WXIndexModel from CuteScrapy.util.MysqlUtils import ORM from CuteScrapy.util.logger import getLogger logging = getLogger('WXSearch') class WXSearch(): def __init__(self): self.orm = ORM() self.session = self.orm.getSession() self.wxindex = WXIndex() def run(self, keywords): logging.info(keywords) now = time.time() end_time = str('%.3f' % (now - 24 * 3600)) start_time = str('%.3f' % (now - 90 * 24 * 3600)) o = 1490609811174 headers = { 'Cookie': WXIndexModel.getCookies().encode('utf8'), 'Referer':
# coding:utf8 import time from CuteScrapy.model.proxy import Proxy from CuteScrapy.util.CommonParser import CommonParser from CuteScrapy.util.logger import getLogger __author__ = 'HuijunZhang' logging = getLogger('ProxyCheck') # 检测代理是否失效,并删除失效代理 class ProxyCheck(): def __init__(self): self.proxy = Proxy() self.commonParser = CommonParser() def run(self): for item in self.proxy.getProxyData(): result = CommonParser().check_proxy(item.type, item.id) if not result.get('status'): status = self.proxy.delByid(item.id) if not status: logging.error('id:%s,delete failed' % item.id) else: logging.info('id:%s is expires.' % item.id) time.sleep(5) if __name__ == '__main__':
# coding:utf8 import zbar from PIL import Image import urllib import cStringIO import json import time import requests from CuteScrapy.util.logger import getLogger __author__ = 'HuijunZhang' ak = '05Unerzh8DGNMf78det8fZB2cPSQLVv3' # 百度前端ak logging = getLogger('commonparser') class CommonParser(): def __init__(self): pass def trim(self, string): if not string: return string string = string.replace(u'\r', u'').replace(u'\n', u'').replace(u'\t', u'') return string.strip() def parseLocationByIp(self, ip): ''' http://lbsyun.baidu.com/index.php?title=webapi/ip-api :param ip: :return:
import MySQLdb import time import datetime from CuteScrapy.util.CommonParser import CommonParser from CuteScrapy.util.logger import getLogger __author__ = 'zhanghj' from CuteScrapy.resource.ResourceHelper import ResourceHelper import requests from lxml import etree import json import re logging = getLogger('UpdateCityToLat') # 城市转化为json class UpdateCityToLat(): def __init__(self): self.commonParser = CommonParser() self.my_conn = MySQLdb.connect(host='127.0.0.1', port=3306, user='******', passwd='', db='scrapy', charset="utf8") def run(self):