Beispiel #1
0
def get_brands():
    tables = [
        'product_lipstick', 'product_eye', 'product_perfume',
        'product_baseMakeup', 'product_other_perfume'
    ]
    file = open(file_path + 'train_files/brands.txt', "w", encoding='utf-8')
    brand1 = ''
    brand2 = ''
    try:
        for i in tables:
            sql = 'select brand from ' + i
            result = list(database_util.search_sql(sql, None))
            for j in result:
                if len(j[0]) == 0:
                    continue
                line = j[0].strip()
                if line.find('(') >= 0:
                    brand1 = line.split('(')[0]
                    brand2 = line.split('(')[1]
                    brand2 = brand2.split(')')[0]
                file.write(brand1 + '\n' + brand2 + '\n')
                print(brand1, brand2)
    except Exception as e:
        print(e)
    finally:
        file.close()
    file_util.del_duplicate('train_files/brands.txt')
    add_to_dictionary('train_files/brands.txt', 'train_files/dictionary.txt')
def get_product_info(sku):
    #数据初始化
    result = {}
    sql = 'select a.name,a.price,a.img,a.url,a.rate,a.comment,a.description,b.shop_name,b.follow,a.sku,a.avg_price,a.sentiment,a.max_price from ' + table + ' a,shop b where a.shop_id=b.shop_id  and a.sku=%s'

    data = [sku]
    sql_result = database_util.search_sql(sql, data)
    if sql_result[0] != -1:
        temp = list(sql_result[1][0])
        result["name"] = temp[0]
        result["price"] = round(float(temp[1]), 1)
        result["img"] = temp[2]
        result["address"] = temp[3]
        result["rate"] = str(round(temp[4] * 100, 2)) + '%'
        if temp[5] > 10000:
            temp[5] = str(float(temp[5]) / 10000) + '万+'
        if temp[8] > 10000:
            temp[8] = str(float(temp[8]) / 10000) + '万'
        result["comment"] = temp[5]
        result["description"] = temp[6]
        result["shop"] = temp[7]
        result["follow"] = temp[8]
        result["sku"] = temp[9]
        result["avg_price"] = round(float(temp[10]), 1)
        result["sentiment"] = int(temp[11])
        result["max_price"] = round(float(temp[12]), 1)

        print(result)
    return result
Beispiel #3
0
def get_descriptions():
    # tables = ['product_lipstick']
    tables = [
        'product_lipstick', 'product_eye', 'product_perfume',
        'product_baseMakeup', 'product_other_perfume'
    ]
    file_name = file_path + 'train_files/descriptions.txt'
    file = open(file_name, "w", encoding='utf-8')
    words_file = open(file_path + 'train_files/description_words.txt',
                      "w",
                      encoding='utf-8')
    jieba.load_userdict(file_path + 'train_files/dictionary.txt')
    jieba.analyse.set_stop_words(file_path +
                                 'train_files/stop_words.txt')  # 去除停用词
    stop_words = get_words('train_files/stop_words.txt')
    dictionary = get_words('train_files/dictionary.txt')

    try:
        for i in tables:
            sql = 'select description from ' + i
            result = list(database_util.search_sql(sql, None))
            for desciprtin in result:
                if len(desciprtin[0]) == 0:
                    continue
                line = desciprtin[0].strip()
                cut_str = ' '.join(jieba.cut(line))
                file.write(cut_str + '\n')
                # print(desciprtin[0])
                # print(cut_str)

                line = re.sub(
                    "[0-9\s+\.\!\/_,$%^*()?;;:-【】+\"\']+|[+——!,;:。?、~@#¥%……&*()]+",
                    "", line)
                word_List = list(jieba.cut(line))  # 用结巴分词,对每行内容进行分词
                outStr = ''
                for word in word_List:
                    if word not in stop_words and word not in dictionary:
                        outStr += word
                        outStr += ' '
                        words_file.write(word + '\n')  # 将分词好的结果写入到输出文件
                print(outStr)

    except Exception as e:
        print("fail in get_description,err:" + str(e))
    finally:
        file.close()
        words_file.close()
    file_util.del_duplicate('train_files/descriptions.txt')
    file_util.del_duplicate('train_files/description_words.txt')
def get_sql(keywords, price1, price2, type):
    data = []
    sql = 'select a.name,a.price,a.img,a.url,a.rate,a.comment,a.description,b.shop_name,b.follow,a.sku,a.sentiment,a.brand_hot,a.avg_price from ' + table + ' a,shop b where a.shop_id=b.shop_id'
    if keywords != '':
        sql = sql + ' and match(a.description) against(%s in natural language mode) '
        data.append(keywords)
    if price1 != '':
        sql = sql + ' and price>=%s '
        data.append(int(price1))
    if price2 != '':
        sql = sql + ' and price<=%s '
        data.append(int(price2))
    if type == 'sale_products':
        sql = sql + ' and price<avg_price'
    return database_util.search_sql(sql, data)
def handle_sql_result(sql_result, user, cur_page):
    #获取weight表中的参数,便于后面排序
    sql = 'select rate,follow,comment,sentiment,brand_hot,sum,comment_score,hot_score from weight where user=%s and kind=%s'
    result = database_util.search_sql(sql, [user, table])
    weight = {}
    if result[0] != -1:
        i = list(result[1])[0]
        sum = i[5]
        w_rate = float(i[0]) / sum
        w_follow = float(i[1]) / sum
        w_comment = float(i[2]) / sum
        w_sentiment = float(i[3]) / sum
        w_brand = float(i[4]) / sum
        comment_score = float(i[6])
        hot_score = float(i[7])
        weight['rate'] = i[0]
        weight['follow'] = i[1]
        weight['comment'] = i[2]
        weight['sentiment'] = i[3]
        weight['brand_hot'] = i[4]

    # 进行sql查询并处理查询结果
    all_list = []
    if sql_result[0] != -1:
        temp = list(sql_result[1])
        for t in temp:
            j = list(t)
            score = round(
                (float(j[4]) * w_rate + float(j[8]) * w_follow / hot_score +
                 float(j[5]) * w_comment / comment_score +
                 float(j[10]) * w_sentiment +
                 float(j[11]) * w_brand / hot_score), 2)
            j.append(score)
            all_list.append(j)

    # 排序之后输出结束
    all_list.sort(key=itemgetter(13), reverse=True)
    page_no = int(len(all_list) / 9)
    if len(all_list) % 9 > 0:
        page_no += 1
    item = []
    cur_page = int(cur_page)
    for i in range(cur_page * 9 - 9, 9 * cur_page):
        if (i >= len(all_list)):
            break
        temp = {}
        temp["name"] = all_list[i][0]
        temp["price"] = str(all_list[i][1])
        temp["img"] = all_list[i][2]
        temp["address"] = all_list[i][3]
        temp["rate"] = str(round(all_list[i][4] * 100, 2)) + '%'
        if all_list[i][5] > 10000:
            all_list[i][5] = str(float(all_list[i][5]) / 10000) + '万+'
        if all_list[i][8] > 10000:
            all_list[i][8] = str(float(all_list[i][8]) / 10000) + '万'
        temp["comment"] = all_list[i][5]
        temp["description"] = all_list[i][6]
        temp["shop"] = all_list[i][7]
        temp["follow"] = all_list[i][8]
        temp["sku"] = all_list[i][9]
        temp['score'] = all_list[i][13]
        temp['avg_price'] = all_list[i][12]
        item.append(temp)
        # print(item)

    #获取热门品牌排名
    brands = []
    brand_sql = 'select b.shop_name,a.brand ,b.follow from ' + table + ' a ,shop b where a.shop_id = b.shop_id group by brand order by follow desc limit 10;'
    sql_result = database_util.search_sql(brand_sql, None)
    if sql_result[0] != -1:
        sql_result = list(sql_result[1])
        for j in sql_result:
            i = list(j)
            temp = {}
            if i[2] > 10000:
                i[2] = str(float(i[2]) / 10000) + '万'
            temp["brand"] = i[1]
            temp["shop"] = i[0]
            temp["follow"] = i[2]
            temp["num"] = i
            brands.append(temp)

    results = {}
    results['data'] = item
    results['brands'] = brands
    results['weight'] = weight
    results['page_no'] = page_no
    return results