def __init__(self, commentSummary, businessStatistics, TrafficStats,
                 customerAnalysis, customerPerfer, effectiveOrders,
                 todaystatistics, activitylist, customer_reminderInfo,
                 customer_reminder, history_consume, PeersCompareAnalysis,
                 getCouponLabel, shopid):
        self.commentSummary = commentSummary
        self.businessStatistics = businessStatistics
        self.TrafficStats = TrafficStats
        self.customerAnalysis = customerAnalysis
        self.customerPerfer = customerPerfer
        self.effectiveOrders = effectiveOrders
        self.todaystatistics = todaystatistics
        self.activitylist = activitylist
        self.customer_reminderInfo = customer_reminderInfo
        self.customer_reminder = customer_reminder
        self.history_consume = history_consume
        self.PeersCompareAnalysis = PeersCompareAnalysis
        self.getCouponLabel = getCouponLabel
        self.shopid = shopid

        self.db = DBUtils(('116.62.70.68', 3306, 'bigdata',
                           'gisfood20171220@nj', 'big_data', 'utf8mb4'))
        self.db2 = DBUtils(('192.168.1.200', 3306, 'njjs_zsz', 'njjs1234',
                            'zszdata', 'utf8mb4'))
        self.db3 = DBUtils(('116.62.70.68', 3306, 'bigdata',
                            'gisfood20171220@nj', 'wmds', 'utf8mb4'))
        self.date = datetime.datetime.now().strftime('%Y-%m-%d')
        self.yesterday = datetime.date.today() - datetime.timedelta(days=1)
        self.time = datetime.datetime.now().strftime('%H:%M:%S')
        self.platform = 2  #平台标签
        self.shopname = self.get_shopname(self.shopid)
        self.WMDS_shopid = self.get_WMDS_shopid()
def run():
    db = DBUtils(
        ('192.168.1.200', 3306, 'njjs', 'njjs1234', 'areadata', 'utf8mb4'))
    p = ParseDeliveryMode(
        "D:\\crawl_data\\店圈监控\\饿了么\\南京\\谢恒兴\\2017-11-29\\rest_info.pickle",
        '南京', '2017-11-29', '谢恒兴', db)
    p.parse()
Beispiel #3
0
def rate_analyze(id, comment_shopid, date, comment_content):
    '''
    对评论数据进行分词识别出相应的类别,然后写入数据库中
    :param id: 评论ID
    :param date: 当前日期
    :param comment_content:评论的内容
    :return:
    '''
    time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    key_vaule = get_config()
    db2 = DBUtils(('116.62.70.68', 3306, 'bigdata', 'gisfood20171220@nj',
                   'big_data', 'utf8mb4'))
    sql2 = "insert into comment_flag VALUES"
    seg_list = jieba.cut(comment_content)  #利用jieba分词,取出句子中所有的分词内容
    print("评论内容:", comment_content)
    flag = 0  #标志位,0:分词中没有差评信息  1:分词中存在差评信息
    keywords = ' '
    for item in seg_list:  #对一条评论的分词结果进行匹配
        for j in range(len(key_vaule)):  #在词库中进行配对
            if key_vaule.iloc[j, 0] == item:
                print("有差评内容:", item)
                keywords = item
                comment_class = int(key_vaule.iloc[j, 1])  #将评论的标签置为相应的值
                flag = 1
                break
        if flag == 0:
            keywords = r' '
            comment_class = 0  #没有差评的时候标签置为0

    print("评论ID:" + str(id), "所属分类:" + str(comment_class))
    batch = BatchSql(sql2)  #写入数据库
    batch.addBatch(
        [0, id, comment_shopid, date, comment_class, keywords, time, None])
    db2.update(batch)
 def __init__(self, city_short, date_short):
     '''
     :param city_short: 城市简写
     :param date_short: 保存日期简写
     '''
     self.city_short = city_short
     self.date_short = date_short
     self.db = DBUtils(('192.168.1.200', 3306, 'njjs_test', 'njjs1234',
                        'datatest', 'utf8mb4'))
Beispiel #5
0
def get_rating_info(date):
    '''
    获取当天的评论,一次性从数据库中读取出来,不断产生评论数据
    :param date:
    :return: 满足要求的评论数据
    '''
    db = DBUtils(('116.62.70.68', 3306, 'bigdata', 'gisfood20171220@nj',
                  'compass_prod', 'utf8mb4'))
    sql = "select t1.id,t1.shop_id,t1.rate_content from order_comment t1 where t1.created_at like %s and t1.rate_content != ''"
    date = date + '%'
    result = db.queryForList(sql, [date])
    return result
Beispiel #6
0
def parse(save_path, city, rest_area, date):
    db = DBUtils(
        ('192.168.1.200', 3306, 'njjs', 'njjs1234', 'areadata', 'utf8mb4'))

    #店铺品类信息入库
    category_path = os.path.join(save_path, 'category.pickle')
    categoryParse = ParseCategoryObject.ParseCategoryObject(
        category_path, city, date, rest_area, db)
    categoryParse.parse()
    err_rest_ids = categoryParse.err_rest_ids
    print(len(err_rest_ids))

    #热搜数据入库
    hotWordPath = os.path.join(save_path, 'hot_word.pickle')
    hotWordParse = ParseHotWordObject.ParseHotWordObject(
        hotWordPath, city, date, rest_area, err_rest_ids, db)
    hotWordParse.parse()

    #菜品数据入库
    menuPath = os.path.join(save_path, 'menu.pickle')
    menuParse = ParseMenuObject.ParseMenuObject(menuPath, city, date,
                                                rest_area, err_rest_ids, db)
    menuParse.parse()

    #评论标签数据入库
    ratingTagPath = os.path.join(save_path, 'rating_tag.pickle')
    ratingTagParse = ParseRatingTagObject.ParseRatingTagObject(
        ratingTagPath, city, date, rest_area, err_rest_ids, db)
    ratingTagParse.parse()

    #店铺评分数据入库评论
    scorePath = os.path.join(save_path, 'score.pickle')
    scoreParse = ParseScoreObject.ParseScoreObject(scorePath, city, date,
                                                   rest_area, err_rest_ids, db)
    scoreParse.parse()

    # 店铺数据入库
    # ratingPath = os.path.join(save_path, 'rating.pickle')
    # ratingParse = ParseRatingObject.ParseRatingObject(ratingPath, city, date, rest_area, db)
    # ratingParse.parse()

    # 店铺配送数据入库
    restInfoPath = os.path.join(save_path, 'rest_info.pickle')
    ratingParse = ParseDeliveryMode.ParseDeliveryMode(restInfoPath, city, date,
                                                      rest_area, db)
    ratingParse.parse()
    print('解析数据结束')

    print('调用存储过程,把import表中数据导入到业务表中')
    db.callProcedure('deal_import_table')
    def getForRestIds(self):
        config = ('192.168.0.200', 3306, 'njjs', 'njjs1234', 'areadata',
                  'utf8mb4')
        db = DBUtils(config)
        date = getTodayLater(1)

        sql = "select DISTINCT t1.rest_id from t_e_rest_list_area t1 where t1.date = '%s' and t1.city = '%s' and t1.rest_area='%s'" % (
            date, self.city, self.rest_area)

        data = db.queryForListBylimit(sql, 0, 10000)
        i = 1
        while data:
            for item in data:
                yield item[0]
            data = db.queryForListBylimit(sql, i * 10000, 10000)
Beispiel #8
0
def Parser():
    '''
    数据入库操作
    :return:
    '''
    db = DBUtils(('192.168.1.200', 3306, 'njjs_test', 'njjs1234', 'datatest',
                  'utf8mb4'))
    result = get_toDB_config(config="config.ini")
    print(result)
    date_flag = result[0][5]  #记录保存日期简写

    for city_path, city_py, city_short, city_cn, save_date, save_date_short in result:

        print("创建各城市的菜品类数据库>>>")
        create_menu = sql_util(city_short, save_date_short)
        create_menu.create_menu_table()

        parsecategory = ParseCategory(city_path, city_py, city_cn, save_date,
                                      save_date_short, db)
        err_rest_ids = parsecategory.run()

        parsemenu = ParseMenu(city_path, city_py, save_date, city_short,
                              save_date_short, err_rest_ids, db)
        parsemenu.run()

        parseratingtag = ParseRating(city_path, city_py, save_date,
                                     save_date_short, err_rest_ids, db)
        parseratingtag.run()

        parsescore = ParseScore(city_path, city_py, save_date, save_date_short,
                                err_rest_ids, db)
        parsescore.run()

        parsehotword = ParseHotword(city_path, city_py, save_date,
                                    save_date_short, err_rest_ids, db)
        parsehotword.run()

        parsedeliverymode = ParseDeliveymode(city_path, city_py, save_date,
                                             save_date_short, db)
        parsedeliverymode.run()

    print("最后关闭JVM")
    jpype.shutdownJVM()
def UpdateRestList(date_short):
    db = DBUtils(('192.168.1.200', 3306, 'njjs_test', 'njjs1234', 'datatest',
                  'utf8mb4'))

    update_sql = """
    CREATE TABLE t_e_rest_list_city_%s as
    SELECT
	a.city,
	a.date,
	a.rest_id,
	a.rest_name,
	a.phone,
	a.address,
	a.avg_cost,
	b.delivery_id,
	a.delivery_fee,
	a.min_delivery_price,
	a.is_new,
	a.is_premium,
	a.latitude,
	a.longitude,
	a.order_month_sales,
	a.area_id
    FROM
        t_e_rest_list_city_pre_%s a
    LEFT JOIN t_e_delivery_mode_city_pre_%s b ON a.city = b.city
    AND a.date = b.date
    AND a.rest_id = b.rest_id;
    """ % (date_short, date_short, date_short)

    del_sql = """
    DROP TABLE t_e_rest_list_city_pre_%s;
    DROP TABLE t_e_delivery_mode_city_pre_%s;
    """ % (date_short, date_short)

    print("更新CityRestList表>>>", update_sql)
    db.deal_sql(update_sql)

    time.sleep(1)
    print("删除两张旧表>>>", del_sql)
    db.deal_sql(del_sql)
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author: Ryan
# @Time: 2018/1/26 下午4:32
'''
营收算法1.0,按照权重来算满减金额,获取商家一个月的预计营收,效果不太好,可能时数据太少
'''
from util.DB.DAO import DBUtils, BatchSql
db = DBUtils(
    ('192.168.1.200', 3306, 'njjs', 'njjs1234', 'exdata_2018', 'utf8mb4'))


def get_shop_money_off(shopid):
    '''
    获取该商家的满减信息
    :return:
    '''
    sql = '''
          SELECT t1.rest_id,t1.full_price price,t1.sub_price
          FROM t_e_rest_money_off_city_1801 t1
          WHERE t1.rest_id = %s
          ORDER BY price DESC
          ''' % (shopid)
    shop_money_off = db.queryForList(sql, None)

    shop_money_off_info = dict()
    for item in shop_money_off:
        shop_money_off_info[item[1]] = item[2]
    return shop_money_off_info

Beispiel #11
0
def run():
    db = DBUtils()
    p = ParseScoreObject(
        "F:\\crawler_data\\数据监控\\饿了么\\谢恒兴监控\\2017-10-28\\score.pickle", '南京',
        '2017-10-28', '谢恒兴', db)
    p.parse()
def run():
    db = DBUtils()
    p = ParseHotWordObject("F:\\crawler_data\\数据监控\\饿了么\\谢恒兴监控\\2017-10-28\\hot_word.pickle",
                           '南京', '2017-10-28', '谢恒兴', db)
    p.parse()
Beispiel #13
0
from SeleniumBackstage.meituan_web.Clientlogin import Clientlogin
from apscheduler.schedulers.blocking import BlockingScheduler
import time
import datetime
import traceback
from util.DB.DAO import DBUtils, BatchSql

db = DBUtils(('116.62.70.68', 3306, 'bigdata', 'gisfood20171220@nj',
              'big_data', 'utf8mb4'))
sql = "insert into meituan_backstage VALUES "
sql2 = "insert into rating_score VALUES"
sql3 = "insert into daily_activity_data VALUES"


def crawler(client, account, password):
    """
    爬取商家后台的数据
    :param client:  浏览器相应操作
    :param account: 用户账号
    :param password:用户密码
    :return:
    """
    print("开始爬取商家:", account)
    #分别保存每组爬取的数据
    result = []
    result1 = []
    result1_1 = []
    result2 = []
    coupon_name = []
    coupon_content = []
    rating_data = []
# @Time: 2018/1/29 下午2:09

'''
处理外卖大师web端的店圈监控的数据,分别获取该店圈的1.5,3,5公里范围的所有店铺数据
'''

from util.DB.DAO import DBUtils,BatchSql
import datetime
import json

'''
数据库信息,数据库1:web端用来保存外卖大师的数据
          数据库2:查询城市数据
'''

db1 = DBUtils(('116.62.70.68', 3306, 'bigdata', 'gisfood20171220@nj', 'wmds', 'utf8mb4'))
db2 = DBUtils(('192.168.1.200', 3306, 'njjs', 'njjs1234', 'exdata_2018', 'utf8mb4'))
time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')


'''
获取配置信息:店铺名称,店铺id,店铺经纬度,店圈范围半径类型
'''
area_shop_name = ['谢恒兴奇味鸡煲(义乌店)','谢恒兴奇味鸡煲(河西万达店)','谢恒兴奇味鸡煲(同曦鸣城店)','谢恒兴奇味鸡煲(殷巷店)','谢恒兴奇味鸡煲(明发广场店)','谢恒兴奇味鸡煲(小市店)','谢恒兴奇味鸡煲(油坊桥店)','谢恒兴奇味鸡煲(元通店)','奇味鸡煲(四方新村店)']
area_shop_id = [160279990,161289358,161378073,161313783,161341608,156753255,4166456,161507150,161506911]
shop_latitude = [31.937813,32.03445,31.9427300,31.9099300,31.9776340,32.0933240,31.9664130,31.992613,32.0185100]
shop_longitude = [118.8760630,118.74473,118.8231500,118.8357800,118.7976980,118.7895010,118.7213890,118.709933,118.8420300]
type = [1500,3000,5000]


def deal_quan_position():
def run():
    db = DBUtils(('192.168.0.200', 3306, 'njjs', 'njjs1234', 'areadata', 'utf8mb4'))

    p = ParseRatingObject("C:\\Users\\Administrator\\Desktop\\test\\rating.pickle",
                             '南京', '2017-11-10', '谢恒兴', db)
    p.parse()