Exemple #1
0
class csv_operator:

    #初始化时写入文件标题,并且删除原有点击数据文件
    filename = config().clicksaveexecl
    if os.path.isfile(filename):
        try:
            os.remove(filename)
        except Exception as e:
            print(e)
    csvFile = open(filename, "w+", newline='')
    writer = csv.writer(csvFile)
    writer.writerow(["spm", "maxclick", "totalclick"])
    csvFile.close()

    def saveexecl(spm, maxclickcount, totalcount):
        filename = config().clicksaveexecl
        csvFile = open(filename, "a+", newline='')
        writer = csv.writer(csvFile)
        writer.writerow([spm, maxclickcount, totalcount])
        csvFile.close()

    def __del__(self):
        try:
            self.csvFile.close()
        except Exception as e:
            pass
Exemple #2
0
    def __init__(self,
                 platform,
                 proxyType,
                 proxyConfig,
                 runType,
                 isProxy=True,
                 isDriver=True,
                 rand=True):
        self.UA = ua()
        self.ua = random.choice(self.UA.USER_AGENTS_H5) if platform.startswith(
            "h5") else random.choice(self.UA.USER_AGENTS_WEB)
        self.isProxy = isProxy
        if self.isProxy:
            self.proxy = self.getProxy(proxyType, proxyConfig, rand)
            if self.proxy and "ERR" not in self.proxy:
                try:
                    self.proxy.split(":")
                except:
                    self.proxy = u"获取代理失败, 请检查代理配置!"
            else:
                self.proxy = u"获取代理失败, 请检查代理配置!"
            print u"当前使用的代理服务器:%s" % self.proxy
        else:
            self.proxy = ""

        self.runType = runType
        self.data = data()
        if isDriver:
            self.config = config(platform, self.proxy)
            self.driver = self.config.getDriver()
        self.session = requests.session()
Exemple #3
0
def producepicture(spm, startdate, enddate):
    conn = DBUtil()
    tablename = conn.tablename
    print("现在处理的spm:", spm)
    sql = "SELECT slideend_x,slideend_y,entity_x,entity_y,entity_width,entity_height  FROM " \
          +tablename+" where spm=%s and dt>=%s and dt<=%s and touch_type=2 order by pos limit 0,100000; "
    args = (spm, startdate, enddate)
    results = conn.executesearch(sql, args)

    conf = config()
    processim = np.zeros([conf.heightborder, conf.widthborder],
                         dtype=int)  #高度和宽度
    count = 0
    for data in results:
        if judegzero(data[4], data[5]) != 0:
            x = int((data[0] - data[2]) / data[4] *
                    conf.widthborder)  # 鼠标点击位置减去容器框位置除以容器框的宽度
            y = int((data[1] - data[3]) / data[5] *
                    conf.heightborder)  #360 120 另外一组是
            if judgeoutborder(x, y):
                count = count + 1
                processim[y, x] = processim[y, x] + 1
                if count % 1000 == 0:
                    print("处理数据进度:", count)
                    print(str(data))

    maxcount = np.max(processim)
    print("最大点击次数为:", maxcount)
    processim = processim * 255 / maxcount
    new_path = "imgs/" + spm + startdate + enddate + ".png"
    print("总点击次数为:", count)
    if (count >= 1000):
        csv_operator.saveexecl(spm, maxcount, count)
        #scipy.misc.imsave(new_path, processim)
        cv2.imwrite(new_path, processim)
Exemple #4
0
def judgeoutborder(width, height):
    conf = config()
    heightborder = conf.heightborder
    widthborder = conf.widthborder
    del conf
    if height < heightborder and width < widthborder:  #注意这里是小于哦
        if height >= 0 and width >= 0:  #点击位置
            return 1
    return 0
Exemple #5
0
def load_all_image(imagepath, nameList, h, w, c, create_npy=False):
    all_size = len(nameList)  #标签里面的图像数量
    all_data = np.zeros((all_size, h, w, c), dtype="uint8")  #先预申请一个比较大的空间
    for i in range(all_size):
        conf = config()
        tmp_img = load_images(imagepath + "/" + str(nameList[i]))  #加载这一张图片
        all_data[i, :, :, 0] = tmp_img[:, :]  #全贴上去
    all_data = all_data / 255.0  #对数据进行归一化
    print("load picture is over!")
    return all_data
Exemple #6
0
def spmlist(startdate, enddate):
    conn = DBUtil()
    conf = config()
    tablename = conf.tablename
    sql = "SELECT distinct spm  FROM " \
          +tablename+" where  dt>=%s and dt<=%s and touch_type=2  ; "
    args = (startdate, enddate)
    results = conn.executesearch(sql, args)

    return results
Exemple #7
0
def removedir():
    conf = config()
    removeimgs(conf.lowlocation)
    removeimgs(conf.highlocation)
Exemple #8
0
# coding=utf-8
###############################################
#该文件将采集到的点击数据转换成图片,得到的数据被保存在hotmap.conf指定的highlocation和lowlocation路径下边
#highlocation路径下边的图片供predict预测,lowlocation路径下边下边的图片暂时不提供使用方法
###############################################
import datetime
import numpy as np
import scipy.misc
import os
import shutil
from sys import argv
from conf.config import config
from store_to_execl import csv_operator
import re
conf = config()


def judegzero(width, height):
    if width != 0 and height != 0:
        return 1
    else:
        return 0


def judgeoutborder(width, height):
    conf = config()
    heightborder = conf.heightborder
    widthborder = conf.widthborder
    del conf
    if height < heightborder and width < widthborder:  #注意这里是小于哦
        if height >= 0 and width >= 0:  #点击位置
Exemple #9
0
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from gevent.pool import Pool
from gevent import monkey, sleep
import requests
from queue import Queue
from conf.config import config
import os
import hashlib
import logging
from umei import *

monkey.patch_all()

c = config()
c.db_conf('mongodb://localhost:27017', db='umei_net', collection='umei2')

browser = c.browser
mongodb = c.mongodb
imgInfo = {
    'ParentPage': '',
    'ParentUrl': '',
    'ArticleTitle': '',
    'ArticleTime': '',
    'ArticleGenre': '',
    'ImageUrl': []
}


class Config:
Exemple #10
0
 def saveexecl(spm, maxclickcount, totalcount):
     filename = config().clicksaveexecl
     csvFile = open(filename, "a+", newline='')
     writer = csv.writer(csvFile)
     writer.writerow([spm, maxclickcount, totalcount])
     csvFile.close()