Ejemplo n.º 1
0
def score():
    jsonData = json.loads(request.data.decode())
    buffer = jsonData["buffer"]
    image = Util.to_binary(jsonData["buffer"]).convert('RGB')
    result = scoring(image)
    array = more(buffer, jsonData["gender"], result)
    return json.dumps({'score': str(round(result, 2)), 'result': array}), 200
Ejemplo n.º 2
0
from urlspecify.urlsingle_fc import Urlsingle
import chardet
import re
import os
import logging
import sys

import atexit
import json

global gstart_page
global gstart_pos

#单例类初始化
sigle = Urlsingle()
util = Util()
pc = Pycrawl()

#log 初始化
logging.basicConfig(filename='log\\2.24.log', level=logging.DEBUG)

header_dic = {}
header_dic['User-Agent'] = sigle._useragent_c
header_dic['Cookie'] = sigle._cookie_c

header_dic_ajax = {}
header_dic_ajax['User-Agent'] = sigle._useragent_c
header_dic_ajax['Cookie'] = sigle._cookie_c
header_dic_ajax['Referer'] = sigle._referer

header_dic_pic = {}
Ejemplo n.º 3
0
    #pic_reg = '<div class="c cl">(?:.*?)<a href="'+single._rooturl+'(.*?)"  onclick="atarget\(this\)" title="(.*?)" class="z">(?:.*?)<img src="http://(.*?)" alt'
    pic_reg = '<div class="c cl">(?:.*?)<a href="'+single._rooturl+'(.*?)"  onclick="atarget\(this\)" title="(.*?)" class="z">(?:.*?)<img src="(.*?)"'
    print pic_reg
    patten = re.compile(pic_reg, re.S)
    #f=open('out.txt','w')
    #print >>f,html_content 
    return patten.findall(html_content)

def get_code(html_content):
	reg = '<tbody>(?:.*?)<td>(.*?)</td>'
	patten = re.compile(reg,re.S)
	return patten.findall(html_content)[0]

#单例类初始化
urlsingle = Urlsingle()
util = Util()
pc = Pycrawl()

header_dic ={}
header_dic['User-Agent'] = urlsingle._useragent_f

for index in range(1,18):
	url = urlsingle._scrapyurl%index
	html_content = pc.get_url_content(header_dic,url)
	# print chardet.detect(html_content) #GB2312
	if html_content:
		#f=open('out.txt','w')
		#print >>f,html_content 
		ret = get_pic_title_and_url(html_content)
		# print len(ret)
		save_path = "savejpdir\\"+str(index)+"\\"
Ejemplo n.º 4
0
#所爬取的网站utf-8编码

#notice表示需要注意的地方
#custom表示每个程序对于这个地方需要手工修改一下
#import重要总结
import re
import os
import logging
import sys
import chardet
from classes.pycrawl import Pycrawl
from classes.util import Util
from urlspecify.urlsingle_pmj import  Urlsingle

pc = Pycrawl()
util = Util()
urlsingle = Urlsingle()

logging.basicConfig(filename='log\\pmj_d.log',level=logging.DEBUG)

header_dic ={}
header_dic['User-Agent'] = urlsingle._useragent_c
header_dic['Cookie'] = urlsingle._cookie_c


def get_urls(content):
	reg = '<img src="(.*?)"'
	patten = re.compile(reg, re.S)
	return patten.findall(content)

def get_pics(html_content):
Ejemplo n.º 5
0
import json
import os

from classes.crawl import Crawl
from classes.crawl_result import CrawlResult
from classes.util import Util
from string import Template

uti = Util()

def list(event, contenxt):
    results = uti.get_all_saved_results()
    
    content = ""
    
    for result in results:
        content += "<li> " + result["title"] + "<br /><a href='" + result["s3_path"] + "'>" + result["page_name"] + "</a> Created: " + result["created"] +"<br />"
        content += "<p> Original Link:" + result['crawl_url'] + "</p>" 
        content += "</li></hr />"
    
    template = """
        <html>
        <head><title>Web Alert Results</title></head>
        <body>
            <h1>Web Alert Results</h1>
            <ul>
        """
    
    template += content

    template += """
Ejemplo n.º 6
0
import atexit
import json

from xlutils.copy import copy
from xlrd import open_workbook
from xlwt import easyxf
import xlwt

global gstart_page
global gstart_pos
global gstart_line

#单例类初始化
sigle = Urlsingle()
util = Util()
pc = Pycrawl()

#log 初始化
logging.basicConfig(filename='log\\fca.t.log', level=logging.DEBUG)

header_dic = {}
header_dic['User-Agent'] = sigle._useragent_c
header_dic['Cookie'] = sigle._cookie_c

header_dic_ajax = {}
header_dic_ajax['User-Agent'] = sigle._useragent_c
header_dic_ajax['Cookie'] = sigle._cookie_c
header_dic_ajax['Referer'] = sigle._referer