class csv_operator: #初始化时写入文件标题,并且删除原有点击数据文件 filename = config().clicksaveexecl if os.path.isfile(filename): try: os.remove(filename) except Exception as e: print(e) csvFile = open(filename, "w+", newline='') writer = csv.writer(csvFile) writer.writerow(["spm", "maxclick", "totalclick"]) csvFile.close() def saveexecl(spm, maxclickcount, totalcount): filename = config().clicksaveexecl csvFile = open(filename, "a+", newline='') writer = csv.writer(csvFile) writer.writerow([spm, maxclickcount, totalcount]) csvFile.close() def __del__(self): try: self.csvFile.close() except Exception as e: pass
def __init__(self, platform, proxyType, proxyConfig, runType, isProxy=True, isDriver=True, rand=True): self.UA = ua() self.ua = random.choice(self.UA.USER_AGENTS_H5) if platform.startswith( "h5") else random.choice(self.UA.USER_AGENTS_WEB) self.isProxy = isProxy if self.isProxy: self.proxy = self.getProxy(proxyType, proxyConfig, rand) if self.proxy and "ERR" not in self.proxy: try: self.proxy.split(":") except: self.proxy = u"获取代理失败, 请检查代理配置!" else: self.proxy = u"获取代理失败, 请检查代理配置!" print u"当前使用的代理服务器:%s" % self.proxy else: self.proxy = "" self.runType = runType self.data = data() if isDriver: self.config = config(platform, self.proxy) self.driver = self.config.getDriver() self.session = requests.session()
def producepicture(spm, startdate, enddate): conn = DBUtil() tablename = conn.tablename print("现在处理的spm:", spm) sql = "SELECT slideend_x,slideend_y,entity_x,entity_y,entity_width,entity_height FROM " \ +tablename+" where spm=%s and dt>=%s and dt<=%s and touch_type=2 order by pos limit 0,100000; " args = (spm, startdate, enddate) results = conn.executesearch(sql, args) conf = config() processim = np.zeros([conf.heightborder, conf.widthborder], dtype=int) #高度和宽度 count = 0 for data in results: if judegzero(data[4], data[5]) != 0: x = int((data[0] - data[2]) / data[4] * conf.widthborder) # 鼠标点击位置减去容器框位置除以容器框的宽度 y = int((data[1] - data[3]) / data[5] * conf.heightborder) #360 120 另外一组是 if judgeoutborder(x, y): count = count + 1 processim[y, x] = processim[y, x] + 1 if count % 1000 == 0: print("处理数据进度:", count) print(str(data)) maxcount = np.max(processim) print("最大点击次数为:", maxcount) processim = processim * 255 / maxcount new_path = "imgs/" + spm + startdate + enddate + ".png" print("总点击次数为:", count) if (count >= 1000): csv_operator.saveexecl(spm, maxcount, count) #scipy.misc.imsave(new_path, processim) cv2.imwrite(new_path, processim)
def judgeoutborder(width, height): conf = config() heightborder = conf.heightborder widthborder = conf.widthborder del conf if height < heightborder and width < widthborder: #注意这里是小于哦 if height >= 0 and width >= 0: #点击位置 return 1 return 0
def load_all_image(imagepath, nameList, h, w, c, create_npy=False): all_size = len(nameList) #标签里面的图像数量 all_data = np.zeros((all_size, h, w, c), dtype="uint8") #先预申请一个比较大的空间 for i in range(all_size): conf = config() tmp_img = load_images(imagepath + "/" + str(nameList[i])) #加载这一张图片 all_data[i, :, :, 0] = tmp_img[:, :] #全贴上去 all_data = all_data / 255.0 #对数据进行归一化 print("load picture is over!") return all_data
def spmlist(startdate, enddate): conn = DBUtil() conf = config() tablename = conf.tablename sql = "SELECT distinct spm FROM " \ +tablename+" where dt>=%s and dt<=%s and touch_type=2 ; " args = (startdate, enddate) results = conn.executesearch(sql, args) return results
def removedir(): conf = config() removeimgs(conf.lowlocation) removeimgs(conf.highlocation)
# coding=utf-8 ############################################### #该文件将采集到的点击数据转换成图片,得到的数据被保存在hotmap.conf指定的highlocation和lowlocation路径下边 #highlocation路径下边的图片供predict预测,lowlocation路径下边下边的图片暂时不提供使用方法 ############################################### import datetime import numpy as np import scipy.misc import os import shutil from sys import argv from conf.config import config from store_to_execl import csv_operator import re conf = config() def judegzero(width, height): if width != 0 and height != 0: return 1 else: return 0 def judgeoutborder(width, height): conf = config() heightborder = conf.heightborder widthborder = conf.widthborder del conf if height < heightborder and width < widthborder: #注意这里是小于哦 if height >= 0 and width >= 0: #点击位置
from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from gevent.pool import Pool from gevent import monkey, sleep import requests from queue import Queue from conf.config import config import os import hashlib import logging from umei import * monkey.patch_all() c = config() c.db_conf('mongodb://localhost:27017', db='umei_net', collection='umei2') browser = c.browser mongodb = c.mongodb imgInfo = { 'ParentPage': '', 'ParentUrl': '', 'ArticleTitle': '', 'ArticleTime': '', 'ArticleGenre': '', 'ImageUrl': [] } class Config:
def saveexecl(spm, maxclickcount, totalcount): filename = config().clicksaveexecl csvFile = open(filename, "a+", newline='') writer = csv.writer(csvFile) writer.writerow([spm, maxclickcount, totalcount]) csvFile.close()