def getData(): db = DB() # get article article = db.one("SELECT * FROM article WHERE date=?", (getToday(), )) if article is not None: # pic TEXT, content TEXT article = {'pic': article[1], 'content': article[2]} # get music music = db.one("SELECT * FROM music WHERE date=?", (getToday(), )) if music is not None: # name TEXT, artist TEXT, pic TEXT, link TEXT music = { 'name': music[1], 'artist': music[2], 'pic': music[3], 'link': music[4] } # get movie movie = db.one("SELECT * FROM movie WHERE date=?", (getToday(), )) if movie is not None: # name TEXT, pic TEXT, type TEXT, score INT, plot TEXT, link TEXT movie = { 'name': movie[1], 'pic': movie[2], 'type': movie[3], 'score': movie[4], 'plot': movie[5], 'link': movie[6] } return {'article': article, 'music': music, 'movie': movie}
def getData(): db = DB() # get article article = db.one("SELECT * FROM article WHERE date=?", (getToday(),)) if article is not None: # pic TEXT, content TEXT article = { 'pic': article[1], 'content': article[2] } # get music music = db.one("SELECT * FROM music WHERE date=?", (getToday(),)) if music is not None: # name TEXT, artist TEXT, pic TEXT, link TEXT music = { 'name': music[1], 'artist': music[2], 'pic': music[3], 'link': music[4] } # get movie movie = db.one("SELECT * FROM movie WHERE date=?", (getToday(),)) if movie is not None: # name TEXT, pic TEXT, type TEXT, score INT, plot TEXT, link TEXT movie = { 'name': movie[1], 'pic': movie[2], 'type': movie[3], 'score': movie[4], 'plot': movie[5], 'link': movie[6] } return { 'article': article, 'music': music, 'movie': movie }
import sys sys.path.append('..') from tools.spider import Spider from tools.db import DB from tools.public import * from lxml import etree url = 'http://wufazhuce.com/' if __name__ == '__main__': spider = Spider() html = spider.crawl(url) selector = etree.HTML(html) url = selector.xpath('//div[@class="carousel-inner"]/div[@class="item active"]/a/img/@src') text = selector.xpath('//div[@class="carousel-inner"]/div[@class="item active"]/div[@class="fp-one-cita-wrapper"]/div[@class="fp-one-cita"]/a/text()') url = url[0] if len(url) == 1 else "" text = text[0] if len(text) == 1 else "" db = DB() select = db.one("SELECT * FROM article WHERE date=?", (getToday(),)) if select is None: insertData = (getToday(), url, text) db.execute("INSERT INTO article VALUES (?, ?, ?)", insertData)
from tools.public import * import random headers = { 'Cookie': 'appver=1.5.0.75771;', 'Referer': 'http://music.163.com/' } payload = { 'id': '140330894', 'updateTime': -1 } url = 'http://music.163.com/api/playlist/detail' if __name__ == '__main__': spider = Spider() data = spider.req(headers=headers, params=payload).crawl(url, pattern='json') if data['code'] == 200 and data['result'] is not None: tracks = data['result']['tracks'] mCount = data['result']['trackCount'] index = random.randint(0, mCount-1) track = tracks[index] db = DB() select = db.one("SELECT * FROM music WHERE date=?", (getToday(),)) if select is None: insertData = (getToday(), track['name'], track['artists'][0]['name'], track['album']['picUrl'], track['mp3Url']) db.execute("INSERT INTO music VALUES (?, ?, ?, ?, ?)", insertData)