def tb_data_pro(data): img_urls = data['images'] img_urls = [x['image_url'] for x in img_urls] hashes = get_hash(img_urls) hash_vvic,images_url_vvic = mysql_gethash() vvic_id = match(hash_vvic,hashes,TH) test(img_urls,images_url_vvic,vvic_id)
def tb_data_pro(data): img_urls = data['images'] img_urls = [x['image_url'] for x in img_urls] hashes = get_hash(img_urls) hash_vvic, images_url_vvic = mysql_gethash() vvic_id = match(hash_vvic, hashes, TH) test(img_urls, images_url_vvic, vvic_id) data['vvic_id'] = str(vvic_id) data['vvic_id'] = vvic_id hashes = json.dumps(hashes) data['hash_code'] = hashes data['images'] = json.dumps(data['images']) fileds = [ 'shop_name', 'product_id', 'name', 'current_price', 'month_sales_count', 'stores_count', 'url', 'images', 'hash_code', 'vvic_id', 'add_time' ] #与data的key相同 #组合mysql命令 table_filed = "taobao_shop(" value = "values(" for filed in fileds: if filed == 'current_price': prices = data[filed].split('-') if len(prices) == 1: table_filed += 'current_price' + ',' + 'current_price_max' + ',' value += "'" + prices[0] + "'" + "," + "'" + prices[ 0] + "'" + "," else: table_filed += 'current_price' + ',' + 'current_price_max' + ',' value += "'" + prices[0] + "'" + "," + "'" + prices[ 1] + "'" + "," else: table_filed += filed + ',' value += "'" + data[filed] + "'" + "," table_filed = table_filed.strip(',') + ")" value = value.strip(',') + ")" sql = "insert into " + table_filed + ' ' + value mysql_insert(sql)
def vvic_data_pro(data): img_urls = data['images'] hashes = get_hash(img_urls) hashes = json.dumps(hashes) data['hashcode'] = hashes
import time from imgHash import get_hash T = 3600 while True: #更新vvic_all_test的hashcode db = pymysql.connect(host='localhost', port=3306, user='******', passwd='Shalou-2018', db='vvic', charset='utf8') cursor = db.cursor() sql = "SELECT id,images FROM vvic_all_test where hashcode = '[]' or hashcode is null" #is null cursor.execute(sql) results = cursor.fetchall() for item in results: images_url = eval(item[1]) images_url = ['https:' + x for x in images_url] hashcode = get_hash(images_url) hashcode = json.dumps(hashcode) sql = "update vvic_all_test set vvic_all_test.hashcode = '%s'where id= %d" % ( hashcode, item[0]) cursor.execute(sql) db.commit() cursor.close() db.close() time.sleep(T)