def score(): jsonData = json.loads(request.data.decode()) buffer = jsonData["buffer"] image = Util.to_binary(jsonData["buffer"]).convert('RGB') result = scoring(image) array = more(buffer, jsonData["gender"], result) return json.dumps({'score': str(round(result, 2)), 'result': array}), 200
from urlspecify.urlsingle_fc import Urlsingle import chardet import re import os import logging import sys import atexit import json global gstart_page global gstart_pos #单例类初始化 sigle = Urlsingle() util = Util() pc = Pycrawl() #log 初始化 logging.basicConfig(filename='log\\2.24.log', level=logging.DEBUG) header_dic = {} header_dic['User-Agent'] = sigle._useragent_c header_dic['Cookie'] = sigle._cookie_c header_dic_ajax = {} header_dic_ajax['User-Agent'] = sigle._useragent_c header_dic_ajax['Cookie'] = sigle._cookie_c header_dic_ajax['Referer'] = sigle._referer header_dic_pic = {}
#pic_reg = '<div class="c cl">(?:.*?)<a href="'+single._rooturl+'(.*?)" onclick="atarget\(this\)" title="(.*?)" class="z">(?:.*?)<img src="http://(.*?)" alt' pic_reg = '<div class="c cl">(?:.*?)<a href="'+single._rooturl+'(.*?)" onclick="atarget\(this\)" title="(.*?)" class="z">(?:.*?)<img src="(.*?)"' print pic_reg patten = re.compile(pic_reg, re.S) #f=open('out.txt','w') #print >>f,html_content return patten.findall(html_content) def get_code(html_content): reg = '<tbody>(?:.*?)<td>(.*?)</td>' patten = re.compile(reg,re.S) return patten.findall(html_content)[0] #单例类初始化 urlsingle = Urlsingle() util = Util() pc = Pycrawl() header_dic ={} header_dic['User-Agent'] = urlsingle._useragent_f for index in range(1,18): url = urlsingle._scrapyurl%index html_content = pc.get_url_content(header_dic,url) # print chardet.detect(html_content) #GB2312 if html_content: #f=open('out.txt','w') #print >>f,html_content ret = get_pic_title_and_url(html_content) # print len(ret) save_path = "savejpdir\\"+str(index)+"\\"
#所爬取的网站utf-8编码 #notice表示需要注意的地方 #custom表示每个程序对于这个地方需要手工修改一下 #import重要总结 import re import os import logging import sys import chardet from classes.pycrawl import Pycrawl from classes.util import Util from urlspecify.urlsingle_pmj import Urlsingle pc = Pycrawl() util = Util() urlsingle = Urlsingle() logging.basicConfig(filename='log\\pmj_d.log',level=logging.DEBUG) header_dic ={} header_dic['User-Agent'] = urlsingle._useragent_c header_dic['Cookie'] = urlsingle._cookie_c def get_urls(content): reg = '<img src="(.*?)"' patten = re.compile(reg, re.S) return patten.findall(content) def get_pics(html_content):
import json import os from classes.crawl import Crawl from classes.crawl_result import CrawlResult from classes.util import Util from string import Template uti = Util() def list(event, contenxt): results = uti.get_all_saved_results() content = "" for result in results: content += "<li> " + result["title"] + "<br /><a href='" + result["s3_path"] + "'>" + result["page_name"] + "</a> Created: " + result["created"] +"<br />" content += "<p> Original Link:" + result['crawl_url'] + "</p>" content += "</li></hr />" template = """ <html> <head><title>Web Alert Results</title></head> <body> <h1>Web Alert Results</h1> <ul> """ template += content template += """
import atexit import json from xlutils.copy import copy from xlrd import open_workbook from xlwt import easyxf import xlwt global gstart_page global gstart_pos global gstart_line #单例类初始化 sigle = Urlsingle() util = Util() pc = Pycrawl() #log 初始化 logging.basicConfig(filename='log\\fca.t.log', level=logging.DEBUG) header_dic = {} header_dic['User-Agent'] = sigle._useragent_c header_dic['Cookie'] = sigle._cookie_c header_dic_ajax = {} header_dic_ajax['User-Agent'] = sigle._useragent_c header_dic_ajax['Cookie'] = sigle._cookie_c header_dic_ajax['Referer'] = sigle._referer