def upload_cos(path, file_name, pic_id, pic_type): try: pic_name = str(pic_id) + "_" + str(pic_type) + '.' + file_name.split( '.')[1] with open(path + file_name, 'rb') as fp: response = client.put_object( Bucket='picture-1256975408', Body=fp, Key=pic_name, StorageClass='STANDARD', ContentType='text/html; charset=utf-8') url = read_conf.get_conf('sys', 'address') + pic_name return url except Exception, err: print('failed to upload picture to cos:%s' % file_name, err) return 'error'
def scan_day_picture(date): # 首先创建壁纸的存放目录 path = read_conf.get_conf('sys', 'path') if not os.path.isdir(path): os.makedirs(path) # 目录 request = urllib2.Request(url, headers=headers) try: response = urllib2.urlopen(request).read() except socket.timeout as e: print(type(e)) except Exception as err: print('a', str(err)) response = response.decode('gbk') # python3 # 首页目录源代码获取 pat_menu = re.compile('<ul class="menu">(.*?)</div></li>', re.S) code_menu = re.search(pat_menu, response) big_title = scan_pic_menu(code_menu) menu_link_new = scan_menu_link(code_menu) scan_pic_page(menu_link_new, path, big_title, date)
# -*- coding:utf-8 -*- import time import urllib2 from conf import read_conf import re import os import socket import random import sys reload(sys) sys.setdefaultencoding('gbk') socket.setdefaulttimeout(30) # 设置socket层的超时时间为20秒 # 首先创建壁纸的存放目录 #path = 'D:\picture\彼岸壁纸' path = read_conf.get_conf('sys', 'path') if not os.path.isdir(path): os.makedirs(path) # 目录 big_title = [] # 网站首页地址 url = read_conf.get_conf('http', 'url') headers = { 'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/56.0.2924.87 Safari/537.36' } request = urllib2.Request(url, headers=headers)
# 2. 读取文件夹图片,保存到fastfds和mysqlimport time import time import urllib2 import re import os import socket import random import sys from conf import read_conf reload(sys) sys.setdefaultencoding('gbk') socket.setdefaulttimeout(30) # 设置socket层的超时时间为20秒 # 网站首页地址 url = read_conf.get_conf('http', 'url') headers = { 'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/56.0.2924.87 Safari/537.36' } def scan_day_picture(date): # 首先创建壁纸的存放目录 path = read_conf.get_conf('sys', 'path') if not os.path.isdir(path): os.makedirs(path) # 目录 request = urllib2.Request(url, headers=headers)
# -*- coding:utf-8 -*- import os import sys import time import picture_util from conf import read_conf from dao import picture_dao from dao.picture_dao import update_picture from dao.upload_cos import upload_cos from concurrent.futures import ThreadPoolExecutor reload(sys) sys.setdefaultencoding('utf-8') # path = "D:/picture/彼岸壁纸/" path = read_conf.get_conf('sys', 'path') # upath = unicode(path, 'utf-8') dirs = os.listdir(path) # type_id = {'美女': '0', '风景': '1', '唯美': '2', '动漫': '3', '游戏': '4', '人物': '5', # '动物': '6', '花卉': '7', '节日': '8', '可爱': '9', '汽车': '10', '日历': '11', # '设计': '12', '影视': '13', '军事': '14', '王者荣耀': '15', '鬼刀': '16', # '护眼': '17', '体育': '18', '其他': '19', '建筑': '20', '美食': '21', '水果': '22'} type_id = { 'meinv': '0', 'fengjing': '1', 'weimei': '2', 'dongman': '3', 'youxi': '4', 'renwu': '5', 'dongwu': '6',
logging.basicConfig() def my_job(): date = time.strftime('%Y-%m-%d', time.localtime(time.time())) # 定时任务 # 1. 根据当前日期爬取最新图片,并保存到/分类/日期/ 文件夹 2016-07-23 # 2. 读取文件夹图片,保存到fastfds和mysql #date = '2018-11-16' # dates = {"2019-08-08"} # for date in dates: # scan_pic_day.scan_day_picture(date) # scan_upload_day.upload_pic(date) scan_pic_day.scan_day_picture(date) scan_upload_day.upload_pic(date) sched = BlockingScheduler() # 定时每天 22:22:22秒执行任务 sched.add_job(my_job, 'cron', day_of_week='0-6', hour=read_conf.get_conf('job', 'hour'), minute=read_conf.get_conf('job', 'minute'), second=read_conf.get_conf('job', 'second'), end_date='2114-05-30') sched.start()
def upload_pic(date): path = read_conf.get_conf('sys', 'path') dirs = os.listdir(path) type_id = {'美女': '0', '风景': '1', '唯美': '2', '动漫': '3', '游戏': '4', '人物': '5', '动物': '6', '花卉': '7', '节日': '8', '可爱': '9', '汽车': '10', '日历': '11', '设计': '12', '影视': '13', '军事': '14', '王者荣耀': '15', '鬼刀': '16', '护眼': '17', '体育': '18', '其他': '19', '建筑': '20', '美食': '21', '水果': '22'} for type_name in dirs: # name = type_name.encode("utf-8") if '.DS_Store' == type_name: continue try: type = type_id[type_name] except: print('failed to get type_name:%s', type_name) continue # type = type_id[type_name] type_path = path + type_name + '/' + date + '/' print(type_path) if os.path.exists(type_path) is False: continue all_pic = os.listdir(type_path) for file_name in all_pic: title = file_name.split('.')[0] pic_desc = title pic_type = type pic_path = type_path + file_name if '.DS_Store' == file_name: continue # 如果是文件夹则跳过 if os.path.isdir(pic_path): continue count = picture_dao.check_picture(title) if count >= 1: print('picture has been exist:%s' % title) continue # 图片信息入库 create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) t = os.path.getctime(pic_path) create_time = picture_util.TimeStampToTime(t) pic_id = picture_dao.insert_picture(title, pic_desc, "", "", 1920, 1080, 0, pic_type, create_time) print('success to insert picture id:%s title: %s', pic_id, title) # 上传原图到cos url = upload_cos(type_path, file_name, pic_id, pic_type) if url == 'error': print('failed to upload_cos big picture') continue # 判断缩略图是否存在,不存在生成 small_pic_path = type_path + 'small/' + file_name if os.path.exists(small_pic_path) is False: picture_util.small_pic(type_path, file_name) small_url = upload_cos(type_path + 'small/', file_name, 'small/' + str(pic_id), pic_type) if small_url == 'error': print('failed to upload_cos small picture') continue # 更新db图片的url update_picture(url, small_url, str(pic_id))