def youkuRelatedVideos(): resArr = [] limit = 6 url = 'http://www.soku.com/search_video/q_' + searchStrAdd soup = ScrabHelper.getSoupFromURL(url, {}, 'utf8') # 暂时取8个 links = soup.find_all('div', 'sk-vlist')[0].find_all('div', 'v-link', limit=limit) for i in range(limit): vid = links[i].find('a').attrs['_log_vid'] title = links[i].find('a').attrs['title'] resArr.append({'vid': vid, 'title': title}) resJson = {'data': resArr} sql = "update jimi_radar_video set related_video_youku = '%s' where id=%d" % ( json.dumps(resJson, ensure_ascii=False, encoding='UTF-8'), id) SqlHelper.ExecuteNonQuery(sql) # youkuRelatedVideos()
def analysisAndSaveOnce(resultsAll, dict, fieldName): # 结果字典 resultDictionary = {} for row in resultsAll: for word in dict: wordEncode = word.encode('utf8') resultHTML = row[0].encode('utf8') # 不知道为什么 pattern1 = re.compile(wordEncode) res = re.findall(pattern1, resultHTML) matchLength = res.__len__() if matchLength != 0: # print matchLength # print wordEncode if resultDictionary.get(wordEncode) == None: resultDictionary[wordEncode] = matchLength else: resultDictionary[wordEncode] += matchLength jsonStr = json.dumps(resultDictionary, ensure_ascii=False, encoding='UTF-8') sql = "update jimi_radar_dimensionmode set %s ='%s' where id=%d" % ( fieldName, jsonStr, dmId) insertNum = SqlHelper.ExecuteNonQuery(sql) print jsonStr print sql print insertNum
def doSave(json): # print json scrabId = json.get('scrabId') # int processed_clue = json.get('processed_clue') # str scrab_result = json.get('scrab_result') data_time = json.get('data_time') rowCount = SqlHelper.ExecuteNonQuery( "insert into jimi_radar_result (scrab_id,processed_clue,scrab_result,data_time,insert_time) values('%d','%s','%s','%s','%s')" % (scrabId, processed_clue, scrab_result, data_time, DateHelper.getDateNowStr()))
def jd(): url = 'https://sclub.jd.com/comment/productPageComments.action?productId=256035&score=0&sortType=3&page=0&pageSize=10&isShadowSku=0&callback=fetchJSON_comment98vv3934' jsonp = ScrabHelper.getHTMLFromURL(url, {}, 'gbk') loadJson = ScrabHelper.loads_jsonp(jsonp) hotCommentTagStatistics = loadJson['hotCommentTagStatistics'] resArr = {} for stat in hotCommentTagStatistics: resArr[stat['name']] = stat['count'] resJson = {'data': resArr} sql = "update jimi_radar_evaluate set jdyinxiang = '%s' where id=%d" % ( json.dumps(resJson, ensure_ascii=False, encoding='UTF-8'), id) print sql SqlHelper.ExecuteNonQuery(sql)
def weiboYinXiang(): resArr = {} url = 'http://s.weibo.com/impress?key=' + searchStr + '&cate=whole&isswitch=1&refer=tag&cuid=3235723984' soup = ScrabHelper.getSoupFromURL(url, {}, 'utf8') secs = soup.find_all('div', 'impress_label')[0].find_all('section') for sec in secs: aas = sec.find_all('a') length = len(aas) for a in aas: text = a.get_text() resArr[text] = (5 - length) resJson = {'data': resArr} sql = "update jimi_radar_evaluate set weiboyinxiang = '%s' where id=%d" % ( json.dumps(resJson, ensure_ascii=False, encoding='UTF-8'), id) print sql SqlHelper.ExecuteNonQuery(sql)
def weixinYear1(): dateNow = datetime.datetime.now() dateYearAgo = dateNow + datetime.timedelta(days=-365) dateNow = str(dateNow).split(' ')[0] dateYearAgo = str(dateYearAgo).split(' ')[0] url = 'http://weixin.sogou.com/weixin?type=2&ie=utf8&query=' + searchStr + '&tsn=5&ft=' + dateYearAgo + '&et=' + dateNow + '&interation=null&wxid=&usip=null&from=tool' soup = ScrabHelper.getSoupFromURL(url, {}, 'utf8') text = soup.find_all('div', 'mun')[0].get_text() # 搜索工具百度为您找到相关结果约403,000个 pat = re.compile(r'[\d,]+') num = pat.findall(text)[0] num = numMinusComma(num) sql = "update jimi_radar_index set %s = '%s' where id=%d" % ( 'weixin_year1', num, id) SqlHelper.ExecuteNonQuery(sql) # weixinYear1()
def baiduYear1(): dateNow = datetime.datetime.now() dateYearAgo = dateNow + datetime.timedelta(days=-365) intNow = str(DateHelper.getDateInt(dateNow)) yearAgoNow = str(DateHelper.getDateInt(dateYearAgo)) print intNow print yearAgoNow stf = 'stf=' + yearAgoNow + ',' + intNow + '|stftype=1' stf = urllib.quote(stf) url = "http://www.baidu.com/s?wd=" + searchStr + "&gpc=" + stf data = requests.get(url).text soup = ScrabHelper.getSoupFromHtml(data) text = soup.find_all('div', 'nums')[0].get_text() # 搜索工具百度为您找到相关结果约403,000个 pat = re.compile(r'[\d,]+') num = pat.findall(text)[0] num = numMinusComma(num) sql = "update jimi_radar_index set %s = '%s' where id=%d" % ('baidu_year1', num, id) SqlHelper.ExecuteNonQuery(sql)
def tiaomao(): url = 'https://rate.tmall.com/listTagClouds.htm?itemId=43165859354&isAll=true&isInner=true&t=1482481000827&callback=jsonp1575' jsonp = ScrabHelper.getHTMLFromURL(url, {}, 'gbk') # json = { # "tags": { # "dimenSum": 9, # "innerTagCloudList": "", # "rateSum": 177, # "structuredRateStatisticList": [], # "tagClouds": [{"count": 39, "id": "10120", "posi": true, "tag": "服务好", "weight": 0}, { # "count": 35, # "id": "620", # "posi": true, # "tag": "质量好", # "weight": 0 # }, {"count": 33, "id": "420", "posi": true, "tag": "物流快", "weight": 0}, { # "count": 33, # "id": "1020", # "posi": true, # "tag": "正品", # "weight": 0 # }, {"count": 13, "id": "824", "posi": true, "tag": "保湿滋润", "weight": 0}, { # "count": 9, # "id": "4624", # "posi": true, # "tag": "吸收效果不错", # "weight": 0 # }, {"count": 8, "id": "2524", "posi": true, "tag": "清洁度强", "weight": 0}, { # "count": 4, # "id": "124", # "posi": true, # "tag": "味道好闻", # "weight": 0 # }, {"count": 3, "id": "1224", "posi": true, "tag": "控油", "weight": 0}], # "userTagCloudList": [{ # "dimenName": "年龄", # "id": 26, # "tagScaleList": [{"count": 2, "index": 0, "proportion": 0.0, "scale": "18岁以下"}, { # "count": 491, # "index": 1, # "proportion": 16.0, # "scale": "18-24" # }, {"count": 1011, "index": 2, "proportion": 33.0, "scale": "25-29"}, { # "count": 1080, # "index": 3, # "proportion": 35.0, # "scale": "30-40" # }, {"count": 484, "index": 4, "proportion": 16.0, "scale": "40岁以上"}], # "total": 3068 # }] # } # } loadjson = ScrabHelper.loads_jsonp(jsonp) # ****************************************************** tagClouds = loadjson['tags']['tagClouds'] resArr = {} for tagCloud in tagClouds: resArr[tagCloud['tag']] = tagCloud['count'] resJson = {'data': resArr} sql = "update jimi_radar_evaluate set tianmaoyinxiang = '%s' where id=%d" % ( json.dumps(resJson, ensure_ascii=False, encoding='UTF-8'), id) print sql SqlHelper.ExecuteNonQuery(sql) # ****************************************************** tagScaleList = loadjson['tags']['userTagCloudList'][0]['tagScaleList'] resArr = {} for tagScale in tagScaleList: resArr[tagScale['scale']] = tagScale['count'] resJson = {'data': resArr} sql = "update jimi_radar_evaluate set tianmaoAge = '%s' where id=%d" % ( json.dumps(resJson, ensure_ascii=False, encoding='UTF-8'), id) print sql SqlHelper.ExecuteNonQuery(sql)
def doSave(numName, num, playName, play): sql = "update jimi_radar_video set %s = '%s' , %s = '%s' where id=%d" % ( numName, num, playName, play, id) print sql SqlHelper.ExecuteNonQuery(sql)