import urllib import datetime import re import requests from sqlHelper import SqlHelper from scrabHelper import ScrabHelper from dateHelper import DateHelper from selenium import webdriver import time dmId = 1 searchArr = ['小黑瓶', '兰蔻小黑瓶', '兰蔻 小黑瓶'] searchStr = searchArr[0] count = SqlHelper.ExecuteScalar( "select count(*) from jimi_radar_index where dm_id=%d and keyword='%s'" % (dmId, searchStr)) if count == 0: SqlHelper.ExecuteScalar( "insert into jimi_radar_index (dm_id,keyword,ctime) values (%d,'%s','%s')" % (dmId, searchStr, DateHelper.getDateNowStr())) id = SqlHelper.ExecuteScalar( "select id from jimi_radar_index where dm_id=%d and keyword='%s'" % (dmId, searchStr)) def numMinusComma(num): return ''.join(num.split(','))
resultDictionary[wordEncode] += matchLength jsonStr = json.dumps(resultDictionary, ensure_ascii=False, encoding='UTF-8') sql = "update jimi_radar_dimensionmode set %s ='%s' where id=%d" % ( fieldName, jsonStr, dmId) insertNum = SqlHelper.ExecuteNonQuery(sql) print jsonStr print sql print insertNum # 得到当前dm爬取了哪些网站 scrab_json = SqlHelper.ExecuteScalar( "select scrab_json from jimi_radar_dimensionmode where id=" + str(dmId)) # {"data": [{"scrabId": 1, # "clue": 'http://cosme.pclady.com.cn/product/29669.html' # }, { # "scrabId": 2, # "clue": 'http://product.kimiss.com/product/80696/' # }]} scrab_json = json.loads(scrab_json) scrabArray = scrab_json['data'] # 得到字典对象数组 dtDict = SqlHelper.ExecuteDataTable( "select cate_json from jimi_radar_dict_cate where id<10 order by id") # 字典关键字数组 {"data":["浓稠", "粘稠", "有点稠", "稠稠", "粘粘", "黏腻", "黏黏", "厚实", "厚重", "比较厚", "丰盈"]} dictKeyWordArray = []