def download_excel(req): import os from spider.api import generate_id from spider.excel import ExcelWriter filename = generate_id() + '.xls' print req titles = ['No.', u'标题', u'获取时间', u'点击量', u'收藏', u'来源', u'评论', u'赞', u'踩', u'网址'] videos = Videos.objects.raw("select * from spider_sites, spider_videos where spider_videos.site_id=spider_sites.id AND spider_videos.status='A';") data = [(i+1, video.title, video.crawling_time.strftime('%Y-%m-%d %H:%M:%S'), video.playcount, video.favorite, video.ch_name, video.community, video.upcount, video.downcount, video.url) for i, video in enumerate(videos)] if req.GET: from spider.api import get_table_by_site, get_table_by_search, get_page data = get_table_by_site(data, int(req.GET.get('site')[0])) if len(req.GET.get('site')) else data data = get_table_by_search(data, req.GET.get('search')[0]) if len(req.GET.get('search')) else data data, has_next, page_count = get_page(data, int(req.GET.get('page')[0])) ExcelWriter(filename, titles, data).write() wrapper = FileWrapper(open(filename, 'rb')) response = HttpResponse(wrapper, content_type='text/plain') response['Content-Length'] = os.path.getsize(filename) return response
def get_table_with_page_site_search(req, page, site, search, type): from spider.api import get_page, get_table_by_search, get_table_by_site import spider.commvals as commvals if type == 'charts': f = open(commvals.TABLE_FOR_CHARTS, 'r') temp = f.read() f.close() table = json.loads(temp) else: videos = Videos.objects.raw("select * from spider_sites, spider_videos where spider_videos.site_id=spider_sites.id AND spider_videos.status='A';") table = [(i+1, video.title, video.crawling_time.strftime('%Y-%m-%d %H:%M:%S'), video.playcount, video.favorite, video.ch_name, video.community, video.upcount, video.downcount, video.url, video.thumbnail) for i, video in enumerate(videos)] page_now = page table = get_table_by_site(table, int(site)) table = get_table_by_search(table, search) page, has_next, page_count = get_page(table, page_now) return json.dumps({'result': page, 'has_next': has_next, 'has_pre': page_now != 1, 'page_total': page_count, 'page_now': page_now})
import time start = time.time() data = [(i+1, video.title, video.crawling_time.strftime('%Y-%m-%d %H:%M:%S'), video.playcount, video.favorite, video.ch_name, video.community, video.upcount, video.downcount, video.url, video.thumbnail) for i, video in enumerate(videos)] # print 'get time:', time.time() - start # ExcelWriter('test.xls', titles, data).write() # print time.time() - start from spider.api import get_page, get_table_by_search, get_table_by_site page_now = 1 table = get_table_by_site(data, int(1)) table = get_table_by_search(table, "优酷") page, has_next, page_count = get_page(data, page_now) # print page # print has_next # print page_count import json print json.dumps({'result': page, 'has_next': has_next, 'has_pre': page_now != 1, 'page_total': page_count, 'page_now': page_now})