コード例 #1
0
def get_driver_num(**op_kwargs):
    driver_num = {}
    res = []
    conn = get_db_conn('mysql_oride_data_readonly')
    mcursor = conn.cursor()
    driver_id = -1
    results = tuple()
    driver_dic = {}
    while True:
        sql = query_driver_city_serv.format(id=driver_id)
        logging.info(sql)
        mcursor.execute(sql)
        conn.commit()
        tmp = mcursor.fetchall()
        if not tmp:
            break
        results += tmp
        driver_id = tmp[-1][0]

    mcursor.close()
    conn.close()
    for data in results:
        driver_dic[data[0]] = ",".join([str(data[1]), str(data[2])])
    redis_conn = RedisHook(redis_conn_id='pika_85').get_conn()
    ts = op_kwargs['ts']
    dt, h = ts.split('T')
    dt = dt + ' ' + h.split('+')[0]
    time_array = time.strptime(dt, "%Y-%m-%d %H:%M:%S")
    timestamp = int(time.mktime(time_array))
    a_member = set()
    no_member = set()
    dt_start = time.strftime('%Y%m%d%H%M', time.localtime(timestamp))
    for i in range(0, 10):
        dt = time.strftime('%Y%m%d%H%M', time.localtime(timestamp + i * 60))
        a_member = a_member.union(set(redis_conn.smembers(active_a_driver % dt)))
        no_member = no_member.union(set(redis_conn.smembers(active_no_driver % dt)))
    for mem in a_member:
        tmp = driver_dic.get(int(mem), '0,0')
        if tmp not in driver_num:
            driver_num[tmp] = {"a_mem": 0, "no_mem": 0}
        driver_num[tmp]["a_mem"] += 1
    for mem in no_member:
        tmp = driver_dic.get(int(mem), '0,0')
        if tmp not in driver_num:
            driver_num[tmp] = {"a_mem": 0, "no_mem": 0}
        driver_num[tmp]["no_mem"] += 1

    for k, v in driver_num.items():
        info = k.split(",")
        res.append([int(info[0]), int(info[1]), dt_start+'00', v["a_mem"], v["no_mem"]])

    conn = get_db_conn('mysql_bi')
    mcursor = conn.cursor()
    mcursor.executemany(insert_driver_num, res)
    logging.info('insert num %s, data %s', len(res), str(res))
    conn.commit()
    mcursor.close()
    conn.close()
コード例 #2
0
def not_pay_push(**op_kwargs):
    dt = op_kwargs.get('ds')
    env = op_kwargs.get('env', 'prod')
    lagos_9_clock_timestamp = get_lagos_timestamp(dt)
    cursor = get_hive_cursor()
    table_name = 'data_order'
    table_name2 = 'data_user_whitelist'
    if env == 'test':
        table_name += '_dev'
        table_name2 += '_dev'
    cursor.execute("msck repair table oride_db.%s" % table_name)
    cursor.execute("msck repair table oride_db.%s" % table_name2)
    cursor.execute(
        not_pay_hql.format(table_name=table_name,
                           table_name2=table_name2,
                           dt=dt))
    res = [x[0] for x in cursor.fetchall()]
    print("not pay order ids: %d" % len(res))
    step = 100
    db_name = 'sqoop_db'
    if env == 'test':
        db_name += '_test'
    mysql_cursor = get_db_conn(db_name).cursor()
    uids = set()
    for i in range(0, len(res), step):
        tmp = [str(x) for x in res[i:i + step]]
        sql = not_pay_sql.format(ids=','.join(tmp))
        mysql_cursor.execute(sql)
        data = mysql_cursor.fetchall()
        for rec in data:
            uids.add(rec[0])
    print("not pay user ids: %d" % len(uids))
    print(uids)
    for uid in uids:
        send_push(env, 1, uid, lagos_9_clock_timestamp, "not_pay")
コード例 #3
0
def get_driver_online_time(ds, **op_kwargs):
    dt = op_kwargs["ds_nodash"]
    conn = get_db_conn('timerange_conn_db')
    mcursor = conn.cursor()
    mcursor.execute(get_driver_id)
    result = mcursor.fetchone()
    conn.commit()
    mcursor.close()
    conn.close()
    processes = []
    max_driver_id = result[0]

    logging.info('max driver id %d', max_driver_id)
    id_list = [x for x in range(1, max_driver_id+1)]
    part_size = 1000
    index = 0
    manager = Manager()
    rows = manager.list([])
    while index < max_driver_id:
        p = Process(target=get_driver_timerange,
                    args=(id_list[index:index + part_size], dt, rows))
        index += part_size
        processes.append(p)
        p.start()
    for p in processes:
        p.join()
    if rows:
        query = """
            INSERT OVERWRITE TABLE oride_dw_ods.{tab_name} PARTITION (dt='{dt}')
            VALUES {value}
        """.format(dt=ds, value=','.join(rows),tab_name=table_name)
        logging.info('import_driver_online_time run sql:%s' % query)
        hive_hook = HiveCliHook()
        hive_hook.run_cli(query)
コード例 #4
0
ファイル: query_data.py プロジェクト: lishuailishuai/shanchu2
def write_email(**op_kwargs):
    dt = op_kwargs.get('ds')
    init_day = n_days_ago(dt, QUERY_DATA_RANGE)
    sql_conn = get_db_conn()
    sql_cursor = sql_conn.cursor()
    sql_cursor.execute(QUERY_EMAIL_DATA % (init_day, dt))
    res = sql_cursor.fetchall()
    res = list(res)
    if len(res) < 1:
        return
    res = map(list, res)
    arr = []
    for elem in res:
        elem[1] = elem[1].strftime('%Y-%m-%d')
        arr.append(elem)
    arr.sort(key=lambda x: x[1], reverse=True)
    h = mail_msg_header.format(dt1=arr[0][1], dt2=arr[-1][1])
    for x in range(len(col_meaning)):
        if x in not_show_indexs:
            continue
        h += part_html1.format(key=col_meaning[x])
        for y in range(len(arr)):
            tmp_val = arr[y][x + 1]
            if tmp_val is None:
                tmp_val = "-"
            elif "ratio" in col_meaning[x] or "rate" in col_meaning[
                    x] or "/" in col_meaning[x]:
                tmp_val = "%.2f%%" % (tmp_val * 100)
            h += part_html2.format(
                val=tmp_val) if x > 0 else part_html2_1.format(val=tmp_val)
        h += part_html3
    h += mail_msg_tail
    h += css_style
    message = MIMEMultipart()
    subject = 'Oride {dt1} -- {dt2} Daily Report'.format(dt1=arr[0][1],
                                                         dt2=arr[-1][1])
    message['Subject'] = Header(subject, 'utf-8')
    message.attach(MIMEText(h, 'html', 'utf-8'))
    att1 = MIMEText(
        open("/tmp/%s_driver_data.csv" % dt, 'r').read(), 'plain', 'utf-8')
    att1["Content-Type"] = 'application/octet-stream'
    att1[
        "Content-Disposition"] = 'attachment; filename="driver_stat_%s.csv"' % dt
    message.attach(att1)
    att2 = MIMEText(
        open("/tmp/%s_online_driver_num.csv" % dt, 'r').read(), 'plain',
        'utf-8')
    att2["Content-Type"] = 'application/octet-stream'
    att2[
        "Content-Disposition"] = 'attachment; filename="%s_online_driver_num.csv"' % dt
    message.attach(att2)
    try:
        server = smtplib.SMTP('mail.opay-inc.com', 25)
        server.ehlo()
        server.starttls()
        server.login(sender, password)
        server.sendmail(sender, receivers, message.as_string())
        print("邮件发送成功")
    except smtplib.SMTPException as e:
        print(e.message)
コード例 #5
0
def hiveresult_to_channel_mysql(ds, **kwargs):
    cursor = get_hive_cursor()
    logging.info(kwargs['sql'].format(ds=ds))
    cursor.execute(kwargs['sql'].format(ds=ds))
    results = cursor.fetchall()
    mysql_conn = get_db_conn('opay_spread_mysql')
    mcursor = mysql_conn.cursor()
    sql_insert = kwargs['sql_insert']
    sql_val = ''
    sql_ext = kwargs['sql_ext']
    sql_count = 0
    for day, channel, driver_type, drivers in results:
        sql_tmp = "('{day}', '{channel}', '{driver_type}', '{dirvers}')".format(
            day=day, channel=channel, driver_type=driver_type, dirvers=drivers)
        if sql_val == '':
            sql_val = sql_tmp
        else:
            sql_val += ',' + sql_tmp
        sql_count += 1
        if sql_count >= 1000:
            sql = sql_insert + ' ' + sql_val + ' ' + sql_ext
            # logging.info(sql)
            mcursor.execute(sql)
            sql_count = 0
            sql_val = ''

    if sql_count > 0:
        sql = sql_insert + ' ' + sql_val + ' ' + sql_ext
        mcursor.execute(sql)

    mysql_conn.commit()
    cursor.close()
    mcursor.close()
    mysql_conn.close()
コード例 #6
0
    def __get_mysql_table_schema(self, mysql_db, mysql_table, mysql_conn):
        mcursor = self.mysql_cursor.get(mysql_conn, None)
        if not mcursor:
            sqlconn = get_db_conn(mysql_conn)
            mcursor = self.mysql_cursor[mysql_conn] = sqlconn.cursor()

        sql = '''
            SELECT 
                COLUMN_NAME, 
                DATA_TYPE, 
                COLUMN_COMMENT,
                COLUMN_TYPE 
            FROM information_schema.COLUMNS 
            WHERE TABLE_SCHEMA='{db}' AND 
                TABLE_NAME='{table}' 
            ORDER BY ORDINAL_POSITION
        '''.format(
            db=mysql_db,
            table=mysql_table
        )
        logging.info(sql)
        mcursor.execute(sql)
        res = mcursor.fetchall()
        # logging.info(res)
        mysql_schema = []
        for (column_name, data_type, column_comment, column_type) in res:
            mysql_schema.append({
                'column': column_name,
                'column_info': "`%s` %s comment '%s'" % (
                    column_name, self.mysql_type_to_hive.get(data_type.upper(), 'string'), column_comment),
                'column_type': data_type.upper().strip()
            })

        logging.info(mysql_schema)
        return mysql_schema
コード例 #7
0
def get_data_from_hive(ds, execution_date, **op_kwargs):
    # ds = op_kwargs.get('ds', time.strftime('%Y-%m-%d', time.localtime(time.time() - 86400)))
    hql = '''
        SELECT 
            create_date_hour , 
            sub_service_type , 
            state , 
            region , 
            order_status , 
            order_cnt , 
            order_amt,
            country_code , 
            dt , 
            hour
        from opay_dw.app_opay_cico_sum_ng_h
        where 
        country_code = 'NG'
    and concat(dt,' ',hour) >= date_format(default.localTime("{config}", 'NG', '{v_date}', -1), 'yyyy-MM-dd HH')
    and concat(dt,' ',hour) <= date_format(default.localTime("{config}", 'NG', '{v_date}', 0), 'yyyy-MM-dd HH')

    '''.format(
        pt=ds,
        v_date=execution_date.strftime("%Y-%m-%d %H:%M:%S"),
        config=config
    )

    logging.info(hql)
    hive_cursor = get_hive_cursor()
    hive_cursor.execute(hql)
    hive_data = hive_cursor.fetchall()

    mysql_conn = get_db_conn('mysql_bi')
    mcursor = mysql_conn.cursor()

    __data_only_mysql(
        mcursor,
        execution_date
    )

    __data_to_mysql(
        mcursor,
        hive_data,
        [
            'create_date_hour',
            'sub_service_type',
            'state',
            'region',
            'order_status',
            'order_cnt',
            'order_amt',
            'country_code',
            'dt',
            'hour'
        ]
    )

    hive_cursor.close()
    mcursor.close()
コード例 #8
0
def csresult_channel_to_mysql(ds, **kwargs):
    cursor = get_hive_cursor()
    logging.info(cssql.format(ds=ds))
    cursor.execute(cssql.format(ds=ds))
    results = cursor.fetchall()
    mysql_conn = get_db_conn('opay_spread_mysql')
    mcursor = mysql_conn.cursor()

    sql_insert = '''
        INSERT INTO promoter_order_day (
            dt, driver_id, driver_type, name, mobile, city_id, distance, income, online_paid, online_total, total_orders,
            arrived_orders, total_comments, bad_comments, total_score, online_time
        ) VALUES
    '''
    sql_ext = '''
        ON DUPLICATE KEY UPDATE 
    '''
    sql_val = ''
    sql_count = 0
    for driver_id, dt, name, phone, city, type, distance, income, onlineSettlement, onlineTotal, total_orders, arrived_orders, comment, badcomments_num, score, onlinetime in results:
        sql_tmp = '''
            ('{dt}', '{driver_id}', '{driver_type}', '{name}', '{mobile}', '{city_id}', '{distance}', '{income}', '{online_paid}', '{online_total}', '{total_orders}', '{arrived_orders}', '{total_comments}', '{bad_comments}', '{total_score}', '{online_time}')
        '''.format(dt=dt,
                   driver_id=driver_id,
                   driver_type=type,
                   name=name.replace("\\", "").replace("'", "\\'"),
                   mobile=phone,
                   city_id=city,
                   distance=distance,
                   income=income,
                   online_paid=onlineSettlement,
                   online_total=onlineTotal,
                   total_orders=total_orders,
                   arrived_orders=arrived_orders,
                   total_comments=comment,
                   bad_comments=badcomments_num,
                   total_score=score,
                   online_time=onlinetime)

        if sql_val == '':
            sql_val = sql_tmp
        else:
            sql_val += ',' + sql_tmp
        sql_count += 1
        if sql_count >= 1000:
            sql = sql_insert + ' ' + sql_val
            mcursor.execute(sql)
            sql_count = 0
            sql_val = ''

    if sql_count > 0:
        sql = sql_insert + ' ' + sql_val
        mcursor.execute(sql)

    mysql_conn.commit()
    cursor.close()
    mcursor.close()
    mysql_conn.close()
コード例 #9
0
def __getOpaySpreadDrivers():
    try:
        citys = __getcityList()
        logging.info(citys)
        mysql_conn = get_db_conn('opay_spread_mysql')
        spread_db = mysql_conn.cursor()
        msql = '''
            select 
                min(if(isnull(gt.team_id), 0, gt.team_id)), 
                min(if(isnull(gt.city), 0, gt.city)), 
                min(if(isnull(gt.team_name), 'other', gt.team_name)), 
                min(if(isnull(gt.group_name), 'other', gt.group_name)), 
                r.driver_id 
            from 
                rider_signups r left join 
                (select 
                    t.id as team_id,
                    t.city,
                    t.name as team_name,
                    g.name as group_name 
                from driver_group g left join driver_team t 
                on g.id = t.group_id 
                where g.del = 0 
                ) gt 
            on gt.team_id = r.team_id 
            where r.driver_id > 0 
            group by r.driver_id 
        '''
        #-- where g.del = 0 and t.del = 0
        logging.info(msql)
        spread_db.execute(msql)
        results = spread_db.fetchall()
        #logging.info(results)
        group_info = {
            'team_id': [],
            'city': [],
            'team_name': [],
            'group_name': [],
            'driver_id': []
        }
        for (team_id, city, team_name, group_name, driver_id) in results:
            group_info['team_id'].append(team_id)
            group_info['city'].append(citys.get(int(city), 'other'))
            group_info['team_name'].append(team_name)
            group_info['group_name'].append(group_name)
            group_info['driver_id'].append(driver_id)

        #logging.info(pandas.DataFrame(group_info))
        spread_db.close()
        mysql_conn.close()
        return pandas.DataFrame(group_info)
    except BaseException as e:
        logging.info(e)
        return None
コード例 #10
0
def __getOrideOrders(st, ed):
    try:
        mysql_conn = get_db_conn('sqoop_db')
        oride_db = mysql_conn.cursor()
        msql = '''
            select 
                driver_id,  
                count(distinct if(take_time>={st} and take_time<{ed}, id, null)) as ordertakes,
                count(distinct if(finish_time>={st} and finish_time<{ed}, id, null)) as orderfinishs,
                count(distinct if(arrive_time>={st} and arrive_time<{ed}, id, null)) as orderarrives,
                if(count(distinct if(take_time>={st} and take_time<{ed}, id, null))>0, 1, 0) as drivertakes,
                if(count(distinct if(finish_time>={st} and finish_time<{ed}, id, null))>0, 1, 0) as driverfinishs,
                if(count(distinct if(arrive_time>={st} and arrive_time<{ed}, id, null))>0, 1, 0) as driverarrives,
                if(count(distinct if(arrive_time>={st} and arrive_time<{ed}, id, null))>=5, 1, 0) as driver5arrives 
            from data_order 
            where ((arrive_time >= {st} and arrive_time < {ed}) or 
                (take_time >= {st} and take_time < {ed})) and 
                driver_serv_type = 2  
            group by driver_id 
        '''.format(st=st, ed=ed)
        logging.info(msql)
        oride_db.execute("set time_zone = '+1:00'")
        oride_db.execute(msql)
        results = oride_db.fetchall()
        driver_info = {
            'driver_id': [],
            'ordertakes': [],
            'orderfinishs': [],
            'orderarrives': [],
            'drivertakes': [],
            'driverfinishs': [],
            'driverarrives': [],
            'driver5arrives': []
        }
        for (driver_id, ordertakes, orderfinishs, orderarrives, drivertakes,
             driverfinishs, driverarrives, driver5arrives) in results:
            driver_info['driver_id'].append(driver_id)
            driver_info['ordertakes'].append(int(ordertakes))
            driver_info['orderfinishs'].append(int(orderfinishs))
            driver_info['orderarrives'].append(int(orderarrives))
            driver_info['drivertakes'].append(int(drivertakes))
            driver_info['driverfinishs'].append(int(driverfinishs))
            driver_info['driverarrives'].append(int(driverarrives))
            driver_info['driver5arrives'].append(int(driver5arrives))

        #logging.info(pandas.DataFrame(driver_info))
        oride_db.close()
        mysql_conn.close()
        return pandas.DataFrame(driver_info)
    except BaseException as e:
        logging.info(e)
        return None
コード例 #11
0
def first_user_data(**op_kwargs):
    cursor = get_hive_cursor()
    dt = op_kwargs.get('ds')
    cursor.execute("SET mapreduce.job.queuename=root.airflow")
    cursor.execute("SET hive.exec.parallel=true")
    hql = """
        SELECT 
            uc.code,
            from_unixtime(unix_timestamp(uo.dt,'yyyy-MM-dd'), 'yyyyMMdd') AS day,
            COUNT(DISTINCT uo.user_id) AS u, 
            unix_timestamp() 
        FROM (SELECT 
                user_id,
                get_json_object(event_value, '$.bind_refferal_code') AS code 
            FROM oride_dw.dwd_oride_driver_cheating_detection_hi 
            ) AS uc 
        JOIN (SELECT 
                dt,
                passenger_id as user_id,
                arrive_time,
                row_number() over(partition by passenger_id order by arrive_time) orders
            FROM oride_dw.dwd_oride_order_base_include_test_di 
            WHERE status IN (4,5) AND 
                dt = '{ds}' 
            ) AS uo 
        ON uc.user_id = uo.user_id 
        WHERE uo.orders = 1 and 
            from_unixtime(uo.arrive_time,'yyyy-MM-dd') = '{ds}' 
        GROUP BY uc.code, uo.dt
    """.format(ds=dt)
    logging.info(hql)
    cursor.execute(hql)
    res = cursor.fetchall()
    mconn = get_db_conn('opay_spread_mysql')
    mysql = mconn.cursor()
    sql = 'insert into promoter_data_day (code, day, pft, create_time) values '
    ext = ' on duplicate key update pft=values(pft), create_time=values(create_time)'
    vals = []
    for (c, d, p, t) in res:
        vals.append("('{c}', '{d}', '{p}', '{t}')".format(c=c, d=d, p=p, t=t))
        if len(vals) >= 1000:
            # logging.info(sql + ",".join(vals) + ext)
            mysql.execute(sql + ",".join(vals) + ext)
            vals = []

    if len(vals) > 0:
        # logging.info(sql + ",".join(vals) + ext)
        mysql.execute(sql + ",".join(vals) + ext)

    mysql.close()
    cursor.close()
コード例 #12
0
def create_hive_external_table(db, table, conn, **op_kwargs):
    sqoopSchema = SqoopSchemaUpdate()
    response = sqoopSchema.update_hive_schema(
        hive_db=hive_db,
        hive_table=hive_table.format(bs=table),
        mysql_db=db,
        mysql_table=table,
        mysql_conn=conn
    )
    #if response:
    #    return True

    mysql_conn = get_db_conn(conn)
    mcursor = mysql_conn.cursor()
    sql = '''
        select 
            COLUMN_NAME, 
            DATA_TYPE, 
            COLUMN_COMMENT,
            COLUMN_TYPE 
        from information_schema.COLUMNS 
        where TABLE_SCHEMA='{db}' and 
            TABLE_NAME='{table}' 
        order by ORDINAL_POSITION
    '''.format(db=db, table=table)
    # logging.info(sql)
    mcursor.execute(sql)
    res = mcursor.fetchall()
    # logging.info(res)
    columns = []
    for (name, type, comment, co_type) in res:
        if type.upper() == 'DECIMAL':
            columns.append("`%s` %s comment '%s'" % (name, co_type.replace('unsigned', '').replace('signed', ''), comment))
        else:
            columns.append("`%s` %s comment '%s'" % (name, mysql_type_to_hive.get(type.upper(), 'string'), comment))
    mysql_conn.close()
    # 创建hive数据表的sql
    hql = ods_create_table_hql.format(
        db_name=hive_db,
        table_name=hive_table.format(bs=table),
        columns=",\n".join(columns),
        hdfs_path=hdfs_path.format(bs=table)
    )
    logging.info(hql)
    hive_hook = HiveCliHook()
    logging.info('Executing: %s', hql)
    hive_hook.run_cli(hql)
コード例 #13
0
def base_data(**op_kwargs):
    cursor = get_hive_cursor()
    dt = op_kwargs.get('ds')
    cursor.execute("SET mapreduce.job.queuename=root.airflow")
    cursor.execute("SET hive.exec.parallel=true")
    hql = """
        SELECT
            t.code,
            from_unixtime(unix_timestamp(dt,'yyyy-MM-dd'), 'yyyyMMdd') as day,
            COUNT(DISTINCT t.bind_number) as users_count,
            COUNT(DISTINCT if (length(t.bind_device)>0, t.bind_device, NULL)) as device_count, 
            unix_timestamp() 
        FROM oride_dw.dwd_oride_driver_cheating_detection_hi 
        LATERAL VIEW json_tuple(event_value, 'bind_refferal_code', 'bind_number', 'bind_device_id') t AS code, bind_number, bind_device 
        WHERE dt = '{ds}'
        GROUP BY t.code, dt
    """.format(ds=dt)
    logging.info(hql)
    cursor.execute(hql)
    res = cursor.fetchall()
    mconn = get_db_conn('opay_spread_mysql')
    mysql = mconn.cursor()
    sql = 'insert into promoter_data_day (code, day, users_count, device_count, create_time) values '
    ext = """ on duplicate key update 
        users_count=values(users_count), 
        device_count=values(device_count), 
        create_time=values(create_time)
    """
    vals = []
    for (code, day, users, device, t) in res:
        vals.append("('{code}', '{day}', '{user}', '{d}', '{t}')".format(
            code=code, day=day, user=users, d=device, t=t))
        if len(vals) >= 1000:
            # logging.info(sql + ",".join(vals) + ext)
            mysql.execute(sql + ",".join(vals) + ext)
            vals = []

    if len(vals) > 0:
        # logging.info(sql + ",".join(vals) + ext)
        mysql.execute(sql + ",".join(vals) + ext)

    mysql.close()
    cursor.close()
コード例 #14
0
def __getcityList():
    try:
        mysql_conn = get_db_conn('sqoop_db')
        oride_db = mysql_conn.cursor()
        msql = '''
            select id, name from data_city_conf
        '''
        oride_db.execute(msql)
        results = oride_db.fetchall()
        citys = {}
        for (city_id, city_name) in results:
            citys[city_id] = city_name

        oride_db.close()
        mysql_conn.close()
        return citys
    except BaseException as e:
        logging.info(e)
        return {}
コード例 #15
0
def order_result_to_mysql(ds, **kwargs):
    cursor = get_hive_cursor()
    logging.info(promoter_orderoverview_hql.format(ds=ds))
    cursor.execute(promoter_orderoverview_hql.format(ds=ds))
    results = cursor.fetchall()
    mysql_conn = get_db_conn('opay_spread_mysql')
    mcursor = mysql_conn.cursor()

    sql_insert = 'INSERT INTO promoter_driver_day (day, name, mobile, code, channel, driver_type, firstbill) VALUES'
    sql_ext = 'ON DUPLICATE KEY UPDATE firstbill = values(firstbill)'
    sql_val = ''
    sql_count = 0
    for day, driver_type, channel, name, mobile, code, first, ten in results:
        sql_tmp = "('{day}', '{name}', '{mobile}',  '{code}', '{channel}', '{driver_type}', '{firstbill}')".format(
            day=day,
            name=name.replace("\\", "").replace("'", "\\'"),
            mobile=mobile if (len(mobile) < 20) else '',
            code=code,
            channel=channel,
            driver_type=driver_type,
            firstbill=(first if driver_type == 2 else 0))

        if sql_val == '':
            sql_val = sql_tmp
        else:
            sql_val += ',' + sql_tmp
        sql_count += 1
        if sql_count >= 1000:
            sql = sql_insert + ' ' + sql_val + ' ' + sql_ext
            mcursor.execute(sql)
            sql_count = 0
            sql_val = ''

    if sql_count > 0:
        sql = sql_insert + ' ' + sql_val + ' ' + sql_ext
        mcursor.execute(sql)

    mysql_conn.commit()
    cursor.close()
    mcursor.close()
    mysql_conn.close()
コード例 #16
0
def dirver_daily_summary_process(rows, index):
    logging.info('insert rows num %d, Pid[%d]', index, os.getpid())
    db_conn = get_db_conn()
    db_conn.autocommit(False)
    db_conn.commit()
    table = 'data_driver_report'
    cur = db_conn.cursor()
    for row in rows:
        lst = []
        for cell in row:
            lst.append(cell)
        values = tuple(lst)
        placeholders = [
            "%s",
        ] * len(values)
        sql = "INSERT INTO "
        sql += "{0} VALUES ({1})".format(table, ",".join(placeholders))
        cur.execute(sql, values)

    db_conn.commit()
    db_conn.close()
コード例 #17
0
def check_ds_data(**op_kwargs):
    ds = op_kwargs.get('ds', time.strftime('%Y-%m-%d', time.localtime(time.time() - 86400)))
    sql = '''
        select count(1) as cnt from bi.ofood_merchant_offline_tag where from_unixtime(update_time,'%Y-%m-%d')='{pt}'
    '''.format(pt=ds)
    mysql_conn = get_db_conn('mysql_bi')
    mcursor = mysql_conn.cursor()
    mcursor.execute(sql)
    res = mcursor.fetchall()
    logging.info(sql)
    logging.info(res)
    logging.info(isinstance(res, tuple))
    logging.info(len(res))
    logging.info(res[0])
    if res is None or not isinstance(res, tuple) or len(res) <= 0:
        comwx.postAppMessage('ofood商家订单指标缺少{}数据, 请及时排查'.format(ds), '271')
    else:
        (cnt,) = res[0]
        logging.info(cnt)
        if cnt <= 0:
            comwx.postAppMessage('ofood商家订单指标缺少{}数据, 请及时排查'.format(ds), '271')
コード例 #18
0
def hiveresult_to_mysql(ds, **kwargs):
    cursor = get_hive_cursor()
    logging.info(kwargs['sql'].format(ds=ds))
    cursor.execute(kwargs['sql'].format(ds=ds))
    results = cursor.fetchall()
    mysql_conn = get_db_conn('opay_spread_mysql')
    mcursor = mysql_conn.cursor()
    sql_insert = kwargs['sql_insert']
    sql_val = ''
    sql_ext = kwargs['sql_ext']
    sql_count = 0
    for day, driver_type, channel, name, mobile, code, drivers in results:
        sql_tmp = "('{day}', '{name}', '{mobile}',  '{code}', '{channel}', '{driver_type}', '{dirvers}')".format(
            day=day,
            name=name.replace("\\", "").replace("'", "\\'"),
            code=code,
            mobile=mobile if (len(mobile) < 20) else '',
            channel=channel,
            driver_type=driver_type,
            dirvers=drivers)
        if sql_val == '':
            sql_val = sql_tmp
        else:
            sql_val += ',' + sql_tmp
        sql_count += 1
        if sql_count >= 1000:
            sql = sql_insert + ' ' + sql_val + ' ' + sql_ext
            #logging.info(sql)
            mcursor.execute(sql)
            sql_count = 0
            sql_val = ''

    if sql_count > 0:
        sql = sql_insert + ' ' + sql_val + ' ' + sql_ext
        mcursor.execute(sql)

    mysql_conn.commit()
    cursor.close()
    mcursor.close()
    mysql_conn.close()
コード例 #19
0
            `not_sys_cancel_orders_dserv` int unsigned not null default 0 comment '司机业务应答后取消status = 6 and driver_id > 0 and cancel_role <> 3 and cancel_role <> 4',
            `picked_orders` int unsigned not null default 0 comment '订单业务成功接驾',
            `picked_orders_dserv` int unsigned not null default 0 comment '司机业务成接驾',
            `orders_accept` int unsigned not null default 0 comment '订单业务接单数',
            `orders_accept_dserv` int unsigned not null default 0 comment '司机业务接单数',
            `agg_orders_finish` int unsigned not null default 0 comment '订单业务累计完单数',
            `agg_orders_finish_dserv` int unsigned not null default 0 comment '司机业务累计完单数',
            primary key (`city_id`,`serv_type`,`order_time`)
        ) engine=innodb DEFAULT CHARSET=utf8;
    """,
    database='bi',
    mysql_conn_id='mysql_bi',
    dag=dag
)

bidb_conn = get_db_conn('mysql_bi')
bidb = bidb_conn.cursor()
oridedb_conn = get_db_conn('sqoop_db')

driver_type = '-1,0,1,2,99'

"""
预插入统计时间节点
@:param op_kwargs 
"""
def preInsertRowPoint(**op_kwargs):
    test_mode = op_kwargs.get('test_mode', False)
    if test_mode:
        ds = op_kwargs.get('ds', time.strftime('%Y-%m-%d', time.localtime(time.time())))
        prev_day_start = int(time.mktime(datetime.strptime(ds, '%Y-%m-%d').timetuple()))
        prev_day_end = prev_day_start + 86400
コード例 #20
0
def get_data_from_hive(ds, execution_date, **op_kwargs):
    # ds = op_kwargs.get('ds', time.strftime('%Y-%m-%d', time.localtime(time.time() - 86400)))
    hql = '''
        SELECT 
            create_date_hour,
            bd_admin_user_id,
            bd_admin_user_name,
            bd_admin_user_mobile,
            bd_admin_dept_id,
            bd_admin_job_id,
            bd_admin_leader_id,
            audited_agent_cnt,
            rejected_agent_cnt,
            ci_suc_order_cnt,
            ci_suc_order_amt,
            co_suc_order_cnt,
            co_suc_order_amt,
            pos_suc_amt,
            pos_suc_cnt,
            country_code,
            dt,
            hour
        from opay_dw.app_opay_bd_agent_report_ng_h
        where 
        country_code = 'NG'
        
    -- 上一个小时 
    --and concat(dt,' ',hour) >= date_format(default.localTime("{config}", 'NG', '{v_date}', -1), 'yyyy-MM-dd HH')
    --当前小时
    
    and concat(dt,' ',hour) = date_format(default.localTime("{config}", 'NG', '{v_date}', 0), 'yyyy-MM-dd HH')

    '''.format(pt=ds,
               v_date=execution_date.strftime("%Y-%m-%d %H:%M:%S"),
               table=table_name,
               db=db_name,
               config=config)

    logging.info(hql)
    hive_cursor = get_hive_cursor()
    hive_cursor.execute(hql)
    hive_data = hive_cursor.fetchall()

    mysql_conn = get_db_conn('app_ali_bi_mysql')
    mcursor = mysql_conn.cursor()

    #__data_only_mysql(
    #    mcursor,
    #    execution_date
    #)

    __data_to_mysql(mcursor, hive_data, [
        'create_date_hour', 'bd_admin_user_id', 'bd_admin_user_name',
        'bd_admin_user_mobile', 'bd_admin_dept_id', 'bd_admin_job_id',
        'bd_admin_leader_id', 'audited_agent_cnt', 'rejected_agent_cnt',
        'ci_suc_order_cnt', 'ci_suc_order_amt', 'co_suc_order_cnt',
        'co_suc_order_amt', 'pos_suc_amt', 'pos_suc_cnt', 'country_code', 'dt',
        'hour'
    ])

    hive_cursor.close()
    mcursor.close()
コード例 #21
0
def get_data_from_impala(**op_kwargs):
    ds = op_kwargs.get(
        'ds', time.strftime('%Y-%m-%d', time.localtime(time.time() - 86400)))
    sql = '''
        WITH
        --司机数据
        driver_data as 
        (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                dd.id as driver_id,
                dd.real_name as driver_name,                                    --司机名字
                dd.phone_number as driver_phone,                                --司机电话
                dd.plate_number as driver_bus_number,                           --车牌号
                dd.cycle_id,                                                    ---环线代号
                cc.name as cycle_name,                                           --所属线路
                0 as number_of_seats                                            --座位数
            from (select 
                    id,
                    real_name,
                    phone_number,
                    plate_number,
                    cycle_id
                from obus_dw_ods.ods_sqoop_data_driver_df 
                where dt='{pt}'
                ) as dd 
            left join (select 
                    id,
                    `name`
                from obus_dw_ods.ods_sqoop_conf_cycle_df 
                where dt='{pt}'
                ) as cc 
            on dd.cycle_id = cc.id
        ),
        --工作数据
        work_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                dr.id as driver_id,
                sum(if(dw.serv_mode=1 and dw.serv_mode1=0, round(abs(dw.create_time2-dw.create_time)/3600,2), 0)) as work_dur             --司机今日在线时长(小时)
            from 
                (select 
                    driver_id,
                    serv_mode,
                    create_time,
                    lead(serv_mode,1,0) over(partition by driver_id order by create_time) serv_mode1,
                    lead(create_time,1,unix_timestamp('{pt} 23:59:59','yyyy-MM-dd HH:mm:ss')) over(partition by driver_id order by create_time) create_time2
                from obus_dw_ods.ods_sqoop_data_driver_work_log_df 
                where dt='{pt}' and 
                    from_unixtime(create_time, 'yyyy-MM-dd')='{pt}'
                ) as dw 
            join (select 
                    id
                from obus_dw_ods.ods_sqoop_data_driver_df 
                where dt='{pt}'
                ) as dr 
            on dw.driver_id = dr.id 
            group by dr.id
        ),
        --订单数据
        order_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                driver_id,
                count(1) as orders,                                         ---本日已经完成的订单数
                sum(price) as mtd_gmv_today                                 ---本日累计交易额
            from obus_dw_ods.ods_sqoop_data_order_df 
            where dt='{pt}' and 
                from_unixtime(create_time, 'yyyy-MM-dd') = '{pt}' and 
                status in (1,2)
            group by driver_id
        )

        --结果集
        select 
            *,
            row_number() over(partition by null order by driver_id) num
        from 
            (select 
                driver_data.dt,
                driver_data.driver_id,
                driver_data.driver_name,
                driver_data.driver_phone,
                driver_data.driver_bus_number,
                driver_data.cycle_id,
                nvl(driver_data.cycle_name, ''),
                driver_data.number_of_seats,
                IF(work_data.work_dur IS NULL, 0, work_data.work_dur),
                IF(order_data.orders IS NULL, 0, order_data.orders),
                IF(order_data.mtd_gmv_today IS NULL, 0, order_data.mtd_gmv_today)
            from driver_data
            left join work_data on driver_data.dt=work_data.dt and 
                                    driver_data.driver_id=work_data.driver_id 
            left join order_data on driver_data.dt = order_data.dt and 
                                    driver_data.driver_id = order_data.driver_id 
            ) as t
    '''.format(pt=ds)
    logging.info(sql)
    hive_cursor = get_hive_cursor()
    hive_cursor.execute(sql)
    result = hive_cursor.fetchall()

    mysql_conn = get_db_conn('mysql_bi')
    mcursor = mysql_conn.cursor()
    __data_to_mysql(
        mcursor, result, [
            'dt', 'num', 'driver_id', 'driver_name', 'driver_phone',
            'driver_bus_number', 'cycle_id', 'cycle_name', 'number_of_seats',
            'mtd_serv_time_today', 'finished_orders_today', 'mtd_gmv_today'
        ], '''
                        num=values(num),
                        driver_name=values(driver_name),
                        driver_phone=values(driver_phone),
                        driver_bus_number=values(driver_bus_number),
                        cycle_id=values(cycle_id),
                        cycle_name=values(cycle_name),
                        number_of_seats=values(number_of_seats),
                        mtd_serv_time_today=values(mtd_serv_time_today),
                        finished_orders_today=values(finished_orders_today),
                        mtd_gmv_today=values(mtd_gmv_today)
                    ''')

    hive_cursor.close()
    mcursor.close()
コード例 #22
0
def opayspreadCount(**op_kwargs):
    test_mode = op_kwargs.get('test_mode', False)
    if test_mode:
        ds = op_kwargs.get(
            'ds', time.strftime('%Y-%m-%d', time.localtime(time.time())))
        prev_day_start = int(
            time.mktime(datetime.strptime(ds, '%Y-%m-%d').timetuple()))
    else:
        prev_timepoint = math.floor(int(time.time()) / 600) * 600 - 600
        prev_day_start = math.floor(prev_timepoint / 86400) * 86400
    prev_day_end = prev_day_start + 86400

    bidbconn = get_db_conn('mysql_bi')
    bidb = bidbconn.cursor()

    driver_orders = __getOrideOrders(prev_day_start, prev_day_end)
    driver_framework = __getOpaySpreadDrivers()

    if driver_orders is None or driver_framework is None:
        raise ValueError('get orders or groups error')

    #results = pandas.merge(driver_orders, driver_framework, on='driver_id')
    results = driver_framework.merge(driver_orders,
                                     how='left',
                                     on=['driver_id'])
    #logging.info(results.tail)
    #按城市、group汇总数据
    group_results = results.groupby(['city', 'group_name']).agg(
        OrderedDict([('city', 'min'), ('group_name', 'min'),
                     ('driver_id', 'count'), ('team_id', 'nunique'),
                     ('ordertakes', 'sum'), ('orderfinishs', 'sum'),
                     ('orderarrives', 'sum'), ('drivertakes', 'sum'),
                     ('driverfinishs', 'sum'), ('driverarrives', 'sum'),
                     ('driver5arrives', 'sum')]))
    #logging.info(group_results)
    #保存结果到数据库
    __dataToMysql(
        time.strftime('%Y-%m-%d 00:00:00', time.localtime(prev_day_start)),
        bidb, group_results.values.tolist(), [
            'daily', 'city', 'group_name', 'drivers', 'teams', 'ordertakes',
            'orderfinishs', 'orderarrives', 'drivertakes', 'driverfinishs',
            'driverarrives', 'driver5arrives'
        ], '''
        teams=values(teams), drivers=values(drivers), ordertakes=values(ordertakes), orderfinishs=values(orderfinishs), 
        orderarrives=values(orderarrives), drivertakes=values(drivertakes), driverfinishs=values(driverfinishs), 
        driverarrives = values(driverarrives), driver5arrives=values(driver5arrives)
        ''')

    team_results = results.groupby(['city', 'group_name', 'team_id']).agg(
        OrderedDict([('city', 'min'), ('group_name', 'min'),
                     ('team_id', 'min'), ('team_name', 'max'),
                     ('driver_id', 'count'), ('ordertakes', 'sum'),
                     ('orderfinishs', 'sum'), ('orderarrives', 'sum'),
                     ('drivertakes', 'sum'), ('driverfinishs', 'sum'),
                     ('driverarrives', 'sum'), ('driver5arrives', 'sum')]))
    #logging.info(team_results)
    #保存结果到数据库
    __dataToMysql(
        time.strftime('%Y-%m-%d 00:00:00', time.localtime(prev_day_start)),
        bidb, team_results.values.tolist(), [
            'daily', 'city', 'group_name', 'team_id', 'team_name', 'drivers',
            'ordertakes', 'orderfinishs', 'orderarrives', 'drivertakes',
            'driverfinishs', 'driverarrives', 'driver5arrives'
        ], '''
        drivers=values(drivers), ordertakes=values(ordertakes), orderfinishs=values(orderfinishs), 
        orderarrives=values(orderarrives), drivertakes=values(drivertakes), driverfinishs=values(driverfinishs), 
        driverarrives = values(driverarrives), driver5arrives=values(driver5arrives)
        ''')

    bidbconn.close()
コード例 #23
0
def data_monitor(**op_kwargs):
    time.sleep(300)
    prev_timepoint = math.floor(int(time.time()) / 600) * 600 - 600
    prev_timestr = time.strftime('%Y-%m-%d %H:%M:00',
                                 time.localtime(prev_timepoint))
    bidbconn = get_db_conn('mysql_bi')
    oride_db_conn = get_db_conn('sqoop_db')
    #查询城市列表
    city_sql = '''
        select count(distinct id) from data_city_conf where id < 999000
    '''
    oridedb = oride_db_conn.cursor()
    oridedb.execute(city_sql)
    results = oridedb.fetchone()
    (city_cnt, ) = results
    total_count = (int(city_cnt) + 1) * 5

    comwx = ComwxApi('wwd26d45f97ea74ad2',
                     'BLE_v25zCmnZaFUgum93j3zVBDK-DjtRkLisI_Wns4g', '1000011')

    #查询当前点数据指标总数
    metrics_sql = '''
        select 
            city_id, city_name, serv_type, order_time, (orders+orders_user+orders_pick+drivers_serv+drivers_orderable+orders_finish+
            avg_pick+avg_take+not_sys_cancel_orders+picked_orders+orders_accept+agg_orders_finish) as total 
        from oride_orders_status_10min where order_time = '{}'
    '''.format(prev_timestr)
    bidb = bidbconn.cursor()

    logging.info(metrics_sql)
    bidb.execute(metrics_sql)
    results = bidb.fetchall()
    metrics_cnt = 0
    for (city_id, city_name, serv_type, order_time, total) in results:
        if city_id >= 999000:
            continue
        metrics_cnt += 1
        if city_id == 0 and serv_type == -1 and total <= 0:
            comwx.postAppMessage(
                '{0}[{1}]10分钟数据{2}数据记录指标全部为0异常,请及时排查,谢谢'.format(
                    city_name, serv_type, order_time), '271')
            return

    if metrics_cnt < total_count:
        comwx.postAppMessage(
            '10分钟数据{0}数据记录缺失异常({1}<{2}),请及时排查,谢谢'.format(
                prev_timestr, metrics_cnt, total_count), '271')
        return

    #检查上2个时间点数据 与 一周前相同时间点对比差异
    weekly_diff = '''
        select 
            t1.city_id, 
            t1.city_name,
            t1.serv_type, 
            t1.order_time, 
            t1.orders as t1orders,
            if(isnull(t2.orders) or t2.orders<=0, 0, t2.orders) as t2orders,
            t1.orders_user as t1ousers,
            if(isnull(t2.orders_user) or t2.orders_user<=0, 0, t2.orders_user) as t2ousers,
            t1.orders_pick as t1opicks,
            if(isnull(t2.orders_pick) or t2.orders_pick<=0, 0, t2.orders_pick) as t2opicks,
            t1.drivers_serv as t1dservs,
            if(isnull(t2.drivers_serv) or t2.drivers_serv<=0, 0, t2.drivers_serv) as t2dservs,
            t1.drivers_orderable as t1doables,
            if(isnull(t2.drivers_orderable) or t2.drivers_orderable<=0, 0, t2.drivers_orderable) as t2doables,
            t1.orders_finish as t1ofs,
            if(isnull(t2.orders_finish) or t2.orders_finish<=0, 0, t2.orders_finish) as t2ofs,
            t1.avg_pick as t1apicks,
            if(isnull(t2.avg_pick) or t2.avg_pick<=0, 0, t2.avg_pick) as t2apicks,
            t1.avg_take as t1atakes,
            if(isnull(t2.avg_take) or t2.avg_take<=0, 0, t2.avg_take) as t2atakes,
            t1.not_sys_cancel_orders as t1norders,
            if(isnull(t2.not_sys_cancel_orders) or t2.not_sys_cancel_orders<=0, 0, t2.not_sys_cancel_orders) as t2norders,
            t1.picked_orders as t1pos,
            if(isnull(t2.picked_orders) or t2.picked_orders<=0, 0, t2.picked_orders) as t2pos,
            t1.agg_orders_finish as t1aofs,
            if(isnull(t2.agg_orders_finish) or t2.agg_orders_finish<=0, 0, t2.agg_orders_finish) as t2aofs
        from
            (select * from oride_orders_status_10min where order_time>=from_unixtime({dsb2})) t1 
        left join 
            (select * from oride_orders_status_10min where order_time>=from_unixtime({dsb7}) and order_time<=from_unixtime({dsb7a3})) t2 
        on 
            t1.city_id = t2.city_id and 
            t1.serv_type = t2.serv_type and 
            t1.order_time = date_format(from_unixtime(unix_timestamp(t2.order_time)+86400*7), '%Y-%m-%d %H:%i:00')
    '''.format(dsb2=prev_timepoint - 1200,
               dsb7=prev_timepoint - 1200 - 86400 * 7,
               dsb7a3=prev_timepoint - 86400 * 7)
    logging.info(weekly_diff)
    bidb.execute(weekly_diff)
    results = bidb.fetchall()
    for (city_id, city_name, serv_type, order_time, t1orders, t2orders,
         t1ousers, t2ousers, t1opicks, t2opicks, t1dservs, t2dservs, t1doables,
         t2doables, t1ofs, t2ofs, t1apicks, t2apicks, t1atakes, t2atakes,
         t1norders, t2norders, t1pos, t2pos, t1aofs, t2aofs) in results:
        if serv_type == -1 and ((t2orders >= 100 and t2orders > t1orders and (t2orders - t1orders)/t2orders > 0.8) or \
                (t2orders > 0 and t2orders < 100 and (t2orders - t1orders) > 40)):
            comwx.postAppMessage(
                '{0}[{1}]10分钟数据{2}下单数记录与上周同期对比异常,请及时排查,谢谢'.format(
                    city_name, serv_type, order_time), '271')
            return

        if serv_type == -1 and ((t2dservs >= 200 and t2dservs > t1dservs and (t2dservs - t1dservs)/t2dservs > 0.8) or \
                (t2dservs > 0 and t2dservs < 100 and (t2dservs - t1dservs) > 80)):
            comwx.postAppMessage(
                '{0}[{1}]10分钟数据{2}在线司机数记录与上周同期对比异常,请及时排查,谢谢'.format(
                    city_name, serv_type, order_time), '271')
            return

        if serv_type == -1 and ((t2doables >= 200 and t2doables > t1doables and (t2doables - t1doables)/t2doables > 0.8) or \
                (t2doables > 0 and t2doables < 100 and (t2doables - t1doables) > 80)):
            comwx.postAppMessage(
                '{0}[{1}]10分钟数据{2}可接单司机数记录与上周同期对比异常,请及时排查,谢谢'.format(
                    city_name, serv_type, order_time), '271')
            return
コード例 #24
0
def get_data_from_impala(**op_kwargs):
    ds = op_kwargs.get('ds', time.strftime('%Y-%m-%d', time.localtime(time.time()-86400)))
    sql = '''
        WITH
        --分城市 
        cycle_data as 
        (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                cy.city_id,
                count(distinct cy.id) as total_lines,                                           --总线路数
                count(distinct dr.id) as total_drivers,                                         --线路总司机数
                count(distinct if(serv_mode='1', dr.id, null)) as serv_drivers,                 --线路上司机数量
                count(distinct if(serv_mode='0', dr.id, null)) as no_serv_drivers               --线路下司机数量
            from (select 
                    cycle_id, 
                    id, 
                    serv_mode 
                from obus_dw_ods.ods_sqoop_data_driver_df 
                where dt='{pt}' and 
                    from_unixtime(login_time, 'yyyy-MM-dd') = '{pt}'
                ) as dr
            inner join (select 
                    id,
                    city_id
                from obus_dw_ods.ods_sqoop_conf_cycle_df 
                where dt='2019-08-17' and 
                    status = '0'
                ) as cy 
            on dr.cycle_id = cy.id 
            group by cy.city_id
        ),
        --不分城市
        cycle_data_all as 
        (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                0 as city_id,
                count(distinct cy.id) as total_lines,                                           --总线路数
                count(distinct dr.id) as total_drivers,                                         --线路总司机数
                count(distinct if(serv_mode=1, dr.id, null)) as serv_drivers,                 --线路上司机数量
                count(distinct if(serv_mode=0, dr.id, null)) as no_serv_drivers               --线路下司机数量
            from (select 
                    cycle_id, 
                    id, 
                    serv_mode 
                from obus_dw_ods.ods_sqoop_data_driver_df 
                where dt='{pt}' and 
                    from_unixtime(login_time, 'yyyy-MM-dd') = '{pt}'
                ) as dr
            inner join (select 
                    id,
                    city_id
                from obus_dw_ods.ods_sqoop_conf_cycle_df 
                where dt='2019-08-17' and 
                    status = 0
                ) as cy 
            on dr.cycle_id = cy.id 
        ),
        --分城市
        order_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                city_id,
                count(1) as line_orders,                                                                --线路总下单数
                sum(if(status in (1,2), 1, 0)) as line_finished_orders,                                  --线路总完单数
                sum(if(status in (1,2), price, 0)) as line_gmv                                          --线路收益
            from obus_dw_ods.ods_sqoop_data_order_df 
            where dt='{pt}' and 
                from_unixtime(cast(create_time as bigint), 'yyyy-MM-dd') = '{pt}'
            group by city_id
        ),
        --不分城市
        order_data_all as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                0 as city_id,
                count(1) as line_orders,
                sum(if(status in (1,2), 1, 0)) as line_finished_orders,
                sum(if(status in (1,2), price, 0)) as line_gmv
            from obus_dw_ods.ods_sqoop_data_order_df 
            where dt='{pt}' and 
                from_unixtime(cast(create_time as bigint), 'yyyy-MM-dd') = '{pt}'
        ),
        --分城市
        station_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                city_id,
                count(distinct id) as total_stations                                                          --总站点数
            from obus_dw_ods.ods_sqoop_conf_station_df 
            where dt='{pt}' 
            group by city_id
        ),
        --不分城市
        station_data_all as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                0 as city_id,
                count(distinct id) as total_stations
            from obus_dw_ods.ods_sqoop_conf_station_df 
            where dt='{pt}' 
        ),
        --分城市
        users_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                city_id,
                count(1) as users                                                                           --新用户数量
            from (select 
                    city_id,
                    user_id,
                    create_time,
                    row_number() over(partition by user_id order by arrive_time) orders
                from obus_dw_ods.ods_sqoop_data_order_df 
                where dt='{pt}' and 
                    status in (1,2) and 
                    user_id > 0
                ) as t
            where from_unixtime(t.create_time, 'yyyy-MM-dd')='{pt}' and 
                orders=1 
            group by t.city_id
        ),
        --不分城市
        users_data_all as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                0 as city_id,
                count(1) as users                                                                           --新用户数量
            from (select 
                    city_id,
                    user_id,
                    create_time,
                    row_number() over(partition by user_id order by arrive_time) orders
                from obus_dw_ods.ods_sqoop_data_order_df 
                where dt='{pt}' and 
                    status in (1,2) and 
                    user_id > 0
                ) as t
            where from_unixtime(t.create_time, 'yyyy-MM-dd')='{pt}' and 
                orders=1 
        ),
        --分城市
        app_users_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                do.city_id,
                sum(if(dp.mode=1 and do.orders=1, 1, 0)) as obusapp_new_users,                                              ---ObusAPP新用户数量
                count(distinct if(dp.mode=1, do.user_id, null)) as money_ballet_users                                       --今日钱包使用人数
            from (select 
                    id,
                    city_id,
                    create_time,
                    user_id,
                    row_number() over(partition by user_id order by arrive_time) orders
                from obus_dw_ods.ods_sqoop_data_order_df 
                where dt='{pt}' and 
                    status in (1,2) and 
                    user_id > 0
                ) as do 
            join (select 
                    id,
                    mode
                from obus_dw_ods.ods_sqoop_data_order_payment_df 
                where dt='{pt}' and 
                    from_unixtime(create_time, 'yyyy-MM-dd')='{pt}'
                ) as dp 
            on do.id = dp.id 
            where from_unixtime(do.create_time, 'yyyy-MM-dd') = '{pt}'
            group by do.city_id 
        ),
        --不分城市
        app_users_data_all as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                0 as city_id,
                sum(if(dp.mode=1 and do.orders=1, 1, 0)) as obusapp_new_users,                                              ---ObusAPP新用户数量
                count(distinct if(dp.mode=1, do.user_id, null)) as money_ballet_users                                       --今日钱包使用人数
            from (select 
                    id,
                    city_id,
                    create_time,
                    user_id,
                    row_number() over(partition by user_id order by arrive_time) orders
                from obus_dw_ods.ods_sqoop_data_order_df 
                where dt='{pt}' and 
                    status in (1,2) and 
                    user_id > 0
                ) as do 
            join (select 
                    id,
                    mode
                from obus_dw_ods.ods_sqoop_data_order_payment_df 
                where dt='{pt}' and 
                    from_unixtime(create_time, 'yyyy-MM-dd')='{pt}'
                ) as dp 
            on do.id = dp.id 
            where from_unixtime(do.create_time, 'yyyy-MM-dd') = '{pt}'
        ),
        --分城市
        app_ticket_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                do.city_id,
                sum(if(dp.mode=2 and do.orders=1, 1, 0)) as ticket_new_users                                                 ---首次使用公交卡新用户数量
            from (select 
                    id,
                    city_id,
                    create_time,
                    ticket_id,
                    row_number() over(partition by ticket_id order by arrive_time) orders
                from obus_dw_ods.ods_sqoop_data_order_df 
                where dt='{pt}' and 
                    status in (1,2) and 
                    ticket_id > 0
                ) as do 
            join (select 
                    id,
                    ticket_id,
                    mode
                from obus_dw_ods.ods_sqoop_data_order_payment_df 
                where dt='{pt}' and 
                    from_unixtime(create_time, 'yyyy-MM-dd')='{pt}'
                ) as dp 
            on do.id = dp.id and do.ticket_id = dp.ticket_id 
            where from_unixtime(do.create_time, 'yyyy-MM-dd') = '{pt}'
            group by do.city_id 
        ),
        --不分城市
        app_ticket_data_all as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                0 as city_id,
                sum(if(dp.mode=2 and do.orders=1, 1, 0)) as ticket_new_users                                                ---首次使用公交卡新用户数量
            from (select 
                    id,
                    city_id,
                    create_time,
                    ticket_id,
                    row_number() over(partition by ticket_id order by arrive_time) orders
                from obus_dw_ods.ods_sqoop_data_order_df 
                where dt='{pt}' and 
                    status in (1,2) and 
                    ticket_id > 0
                ) as do 
            join (select 
                    id,
                    ticket_id,
                    mode
                from obus_dw_ods.ods_sqoop_data_order_payment_df 
                where dt='{pt}' and 
                    from_unixtime(create_time, 'yyyy-MM-dd')='{pt}'
                ) as dp 
            on do.id = dp.id and do.ticket_id = dp.ticket_id 
            where from_unixtime(do.create_time, 'yyyy-MM-dd') = '{pt}'
        ),
        --分城市
        recharge_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                du.city_id,
                count(distinct if(rc.status=1 and from_unixtime(rc.create_time,'yyyy-MM-dd')='{pt}', rc.user_id, null)) as recharge_users,              --用户钱包充值人数
                count(distinct rc.user_id) as online_uv,                                                                                                --用户钱包总数量=线上uv
                sum(if(rc.status=1 and rc.recharge=1 and from_unixtime(rc.create_time,'yyyy-MM-dd')='{pt}', 1, 0)) as money_ballet_recharge_users       --今日钱包新充值人数
            from (select 
                    user_id,
                    status,
                    create_time, 
                    row_number() over(partition by user_id order by create_time) recharge
                from obus_dw_ods.ods_sqoop_data_user_recharge_df 
                where dt='{pt}' and 
                    user_id > 0
                ) as rc 
            join (select 
                    city_id,
                    id 
                from obus_dw_ods.ods_sqoop_data_user_df 
                where dt='{pt}'
                ) as du 
            on rc.user_id = du.id 
            group by du.city_id 
        ),
        --不分城市
        recharge_data_all as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                0 as city_id,
                count(distinct if(rc.status=1 and from_unixtime(rc.create_time,'yyyy-MM-dd')='{pt}', rc.user_id, null)) as recharge_users,              --用户钱包充值人数
                count(distinct rc.user_id) as online_uv,                                                                                                --用户钱包总数量=线上uv
                sum(if(rc.status=1 and rc.recharge=1 and from_unixtime(rc.create_time,'yyyy-MM-dd')='{pt}', 1, 0)) as money_ballet_recharge_users       --今日钱包新充值人数
            from (select 
                    user_id,
                    status,
                    create_time, 
                    row_number() over(partition by user_id order by create_time) recharge
                from obus_dw_ods.ods_sqoop_data_user_recharge_df 
                where dt='{pt}' and 
                    user_id > 0
                ) as rc 
        ),
        --分城市
        ticket_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                city_id,
                count(1) as tied_tickets                                                --绑卡数
            from obus_dw_ods.ods_sqoop_data_ticket_df 
            where dt='{pt}' and 
                status=0 and 
                from_unixtime(bind_time, 'yyyy-MM-dd') = '{pt}' 
            group by city_id
        ),
        --不分城市
        ticket_data_all as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                0 as city_id,
                count(1) as tied_tickets                                                --绑卡数
            from obus_dw_ods.ods_sqoop_data_ticket_df 
            where dt='{pt}' and 
                status=0 and 
                from_unixtime(bind_time, 'yyyy-MM-dd') = '{pt}' 
        )
        --结果集
        select 
            cycle_data.dt,
            cycle_data.city_id,
            nvl(dc.name,''),
            cycle_data.total_lines,
            cycle_data.total_drivers,
            cycle_data.serv_drivers,
            cycle_data.no_serv_drivers,
            IF(order_data.line_orders IS NULL, 0, order_data.line_orders),
            IF(order_data.line_finished_orders IS NULL, 0, order_data.line_finished_orders),
            IF(order_data.line_gmv IS NULL, 0, order_data.line_gmv),
            IF(station_data.total_stations IS NULL, 0, station_data.total_stations),
            IF(users_data.users IS NULL, 0, users_data.users),
            IF(app_users_data.obusapp_new_users IS NULL, 0, app_users_data.obusapp_new_users),
            IF(app_ticket_data.ticket_new_users IS NULL, 0, app_ticket_data.ticket_new_users),
            IF(app_users_data.money_ballet_users IS NULL, 0, app_users_data.money_ballet_users),
            IF(recharge_data.recharge_users IS NULL, 0, recharge_data.recharge_users),
            IF(recharge_data.online_uv IS NULL, 0, recharge_data.online_uv),
            IF(recharge_data.money_ballet_recharge_users IS NULL, 0, recharge_data.money_ballet_recharge_users),
            IF(ticket_data.tied_tickets IS NULL, 0, ticket_data.tied_tickets) 
        from (select * from cycle_data union select * from cycle_data_all) as cycle_data 
        left join (select * from order_data union select * from order_data_all) as order_data 
            on cycle_data.dt = order_data.dt and cycle_data.city_id=order_data.city_id 
        left join (select * from station_data union select * from station_data_all) as station_data 
            on station_data.dt = cycle_data.dt and station_data.city_id = cycle_data.city_id 
        left join (select * from users_data union select * from users_data_all) as users_data 
            on users_data.dt = cycle_data.dt and users_data.city_id = cycle_data.city_id 
        left join (select * from app_users_data union select * from app_users_data_all) as app_users_data 
            on app_users_data.dt = cycle_data.dt and app_users_data.city_id = cycle_data.city_id 
        left join (select * from recharge_data union select * from recharge_data_all) as recharge_data 
            on recharge_data.dt = cycle_data.dt and recharge_data.city_id = cycle_data.city_id 
        left join (select * from ticket_data union select * from ticket_data_all) as ticket_data 
            on ticket_data.dt = cycle_data.dt and ticket_data.city_id = cycle_data.city_id 
        left join (select * from app_ticket_data union select * from app_ticket_data_all) as app_ticket_data 
            on app_ticket_data.dt = cycle_data.dt and app_ticket_data.city_id = cycle_data.city_id 
        left join (select id, name from obus_dw_ods.ods_sqoop_conf_city_df where dt='{pt}' and validate=1) as dc 
            on cycle_data.city_id = dc.id
            
    '''.format(
        pt=ds
    )
    logging.info(sql)
    hive_cursor = get_hive_cursor()
    hive_cursor.execute(sql)
    result = hive_cursor.fetchall()

    mysql_conn = get_db_conn('mysql_bi')
    mcursor = mysql_conn.cursor()
    __data_to_mysql(mcursor, result,
                ['dt','city_id','city','total_lines_double','total_drivers','serv_drivers',
                    'no_serv_drivers','lines_orders_double','lines_finished_orders_double',
                    'line_gmv_double','total_stations','new_users','obusapp_new_users','ticket_new_users',
                    'money_ballet_users','recharge_users','online_uv','money_ballet_recharge_users','tied_cards'],
                '''
                    total_lines_double=values(total_lines_double),
                    total_drivers=values(total_drivers),
                    serv_drivers=values(serv_drivers),
                    no_serv_drivers=values(no_serv_drivers),
                    lines_orders_double=values(lines_orders_double),
                    lines_finished_orders_double=values(lines_finished_orders_double),
                    total_stations=values(total_stations),
                    line_gmv_double=values(line_gmv_double),
                    new_users=values(new_users),
                    obusapp_new_users=values(obusapp_new_users),
                    ticket_new_users=values(ticket_new_users),
                    recharge_users=values(recharge_users),
                    online_uv=values(online_uv),
                    money_ballet_users=values(money_ballet_users),
                    tied_cards=values(tied_cards),
                    money_ballet_recharge_users=values(money_ballet_recharge_users)
                '''
    )

    hive_cursor.close()
    mcursor.close()
コード例 #25
0
def get_data_from_impala(**op_kwargs):
    ds = op_kwargs.get('ds', time.strftime('%Y-%m-%d', time.localtime(time.time() - 86400)))
    sql = '''
        WITH
        --线路数据
        line_data as 
        (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                city_id,
                id,
                name                                    --line_name
            from obus_dw_ods.ods_sqoop_conf_line_df 
            where dt='{pt}'   
        ),
        --站点数据
        station_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                cs.city_id,
                cls.line_id,
                cs.id,                                  --站点ID
                cs.name                                 --站点名
            from (select 
                    id,
                    city_id,
                    name 
                from obus_dw_ods.ods_sqoop_conf_station_df 
                where dt='{pt}'
                ) as cs 
            left join (select 
                    line_id,
                    station_id 
                from obus_dw_ods.ods_sqoop_conf_line_stations_df 
                where dt='{pt}') as cls 
            on cs.id = cls.station_id
        ),
        --司机数据
        driver_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                cl.city_id,
                cl.id,
                cl.start_station,
                count(1) as total_drivers,                          --司机总人数
                sum(if(dd.serv_mode=1, 1, 0)) as serv_drivers,      --上班司机数
                sum(if(dd.serv_status=1 and dd.serv_mode=1, 1, 0)) as serv_on_the_road_drivers,     --上班行驶司机数
                sum(if(dd.serv_mode=1 and dd.serv_status in (0,2), 1, 0)) as serv_idle_drivers,     --上班未行驶司机数
                sum(if(dd.serv_mode=0, 1, 0)) as no_serv_drivers           --下班司机数
            from (select 
                    id,
                    city_id,
                    start_station
                from obus_dw_ods.ods_sqoop_conf_line_df 
                where dt='{pt}'
                ) as cl 
            join (select 
                    id,
                    line_id,
                    serv_mode,
                    serv_status
                from obus_dw_ods.ods_sqoop_data_driver_df 
                where dt='{pt}' and 
                    from_unixtime(login_time, 'yyyy-MM-dd')='{pt}'
                ) as dd 
            on cl.id = dd.line_id 
            group by cl.city_id, cl.id, cl.start_station
        ),
        --线路订单数据
        line_order_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                city_id,
                line_id,
                count(1) as lines_orders,                       ---线路总订单数
                sum(if(status in (1,2), 1, 0)) as lines_finished_orders,     ---线路总完单数
                sum(if(status in (1,2), price, 0)) as line_gmv_single       --线路收益(单)
            from obus_dw_ods.ods_sqoop_data_order_df 
            where dt='{pt}' and 
                from_unixtime(create_time, 'yyyy-MM-dd') = '{pt}'
            group by city_id, line_id
        ),
        --站点订单数据
        station_order_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                city_id,
                line_id,
                start_station_id,
                count(1) as station_orders,                         --分站点订单数
                sum(if(status in (1,2), 1, 0)) as station_finished_orders,      ---分站点完单数
                count(distinct if(start_station_id>0 and status in (0,1,2), user_id, null)) as get_on_users --上车乘客数
            from obus_dw_ods.ods_sqoop_data_order_df 
            where dt='{pt}' and 
                from_unixtime(create_time, 'yyyy-MM-dd') = '{pt}' 
            group by city_id, line_id, start_station_id
        ),
        --新用户数量
        new_users_data as (
            select 
                 from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                 city_id,
                 line_id,
                 start_station_id,
                 sum(if(orders=1, 1, 0)) as new_users                       --新用户数量
            from (select 
                    city_id,
                    line_id,
                    start_station_id,
                    create_time,
                    user_id, 
                    row_number() over(partition by user_id order by arrive_time) orders
                from obus_dw_ods.ods_sqoop_data_order_df 
                where dt='{pt}' and 
                    status in (1,2) and 
                    user_id>0
                ) as t
            where from_unixtime(create_time,'yyyy-MM-dd')='{pt}' 
            group by city_id, line_id, start_station_id
        ),
        --下车乘客数
        get_off_users as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                city_id,
                line_id,
                end_station_id,
                count(distinct if(end_station_id>0 and status in (0,1,2), user_id, null)) as get_off_users --下车乘客数
            from obus_dw_ods.ods_sqoop_data_order_df 
            where dt='{pt}' and 
                from_unixtime(create_time, 'yyyy-MM-dd') = '{pt}' 
            group by city_id, line_id, end_station_id
        )

        --结果集
        select 
            station_data.dt,
            station_data.city_id,
            nvl(dc.name, ''),
            IF(station_data.line_id IS NULL, 0, station_data.line_id),
            nvl(line_data.name, ''),
            station_data.id,
            station_data.name,
            IF(driver_data.total_drivers IS NULL, 0, driver_data.total_drivers),
            IF(driver_data.serv_drivers IS NULL, 0, driver_data.serv_drivers),
            IF(driver_data.serv_on_the_road_drivers IS NULL, 0, driver_data.serv_on_the_road_drivers),
            IF(driver_data.serv_idle_drivers IS NULL, 0, driver_data.serv_idle_drivers),
            IF(driver_data.no_serv_drivers IS NULL, 0, driver_data.no_serv_drivers),
            IF(line_order_data.lines_orders IS NULL, 0, line_order_data.lines_orders),
            IF(station_order_data.station_orders IS NULL, 0, station_order_data.station_orders),
            IF(line_order_data.lines_finished_orders IS NULL, 0, line_order_data.lines_finished_orders),
            IF(station_order_data.station_finished_orders IS NULL, 0, station_order_data.station_finished_orders),
            IF(new_users_data.new_users IS NULL, 0, new_users_data.new_users),
            IF(station_order_data.get_on_users IS NULL, 0, station_order_data.get_on_users),
            IF(get_off_users.get_off_users IS NULL, 0, get_off_users.get_off_users),
            IF(line_order_data.line_gmv_single IS NULL, 0, line_order_data.line_gmv_single)
        from station_data 
        left join line_data on station_data.dt=line_data.dt and 
                                station_data.city_id=line_data.city_id and 
                                station_data.line_id=line_data.id 
        left join driver_data on station_data.dt = driver_data.dt and 
                                station_data.city_id = driver_data.city_id and 
                                station_data.line_id = driver_data.id and 
                                station_data.id = driver_data.start_station 
        left join line_order_data on station_data.dt = line_order_data.dt and 
                                station_data.city_id = line_order_data.city_id and 
                                station_data.line_id = line_order_data.line_id  
        left join station_order_data on station_data.dt = station_order_data.dt and 
                                station_data.city_id = station_order_data.city_id and 
                                station_data.line_id = station_order_data.line_id and 
                                station_data.id = station_order_data.start_station_id 
        left join new_users_data on station_data.dt = new_users_data.dt and 
                                station_data.city_id = new_users_data.city_id and 
                                station_data.line_id = new_users_data.line_id and 
                                station_data.id = new_users_data.start_station_id 
        left join get_off_users on station_data.dt = get_off_users.dt and 
                                station_data.city_id = get_off_users.city_id and 
                                station_data.line_id = get_off_users.line_id and 
                                station_data.id = get_off_users.end_station_id 
        left join (select id, name from obus_dw_ods.ods_sqoop_conf_city_df where dt='{pt}' and validate=1) as dc 
            on station_data.city_id = dc.id 

    '''.format(
        pt=ds
    )
    logging.info(sql)
    hive_cursor = get_hive_cursor()
    hive_cursor.execute(sql)
    result = hive_cursor.fetchall()

    mysql_conn = get_db_conn('mysql_bi')
    mcursor = mysql_conn.cursor()
    __data_to_mysql(mcursor, result,
                    ['dt', 'city_id', 'city', 'line_id', 'line_name', 'station_id',
                     'station_name', 'total_drivers', 'serv_drivers', 'serv_on_the_road_drivers',
                     'serv_idle_drivers', 'no_serv_drivers', 'lines_orders', 'station_orders', 'lines_finished_orders',
                     'station_finished_orders', 'new_users', 'get_on_users', 'get_off_users', 'line_gmv_single'],
                    '''
                        total_drivers=values(total_drivers),
                        serv_drivers=values(serv_drivers),
                        serv_on_the_road_drivers=values(serv_on_the_road_drivers),
                        serv_idle_drivers=values(serv_idle_drivers),
                        no_serv_drivers=values(no_serv_drivers),
                        lines_orders=values(lines_orders),
                        station_orders=values(station_orders),
                        lines_finished_orders=values(lines_finished_orders),
                        station_finished_orders=values(station_finished_orders),
                        new_users=values(new_users),
                        get_on_users=values(get_on_users),
                        get_off_users=values(get_off_users),
                        line_gmv_single=values(line_gmv_single)
                    '''
                    )

    hive_cursor.close()
    mcursor.close()
コード例 #26
0
def init_mysql_table(**op_kwargs):
    hive_cursor = get_hive_cursor('hiveserver2_default')
    hive_db = op_kwargs.get('db')
    hive_table = op_kwargs.get('table')
    mysql_cursor = op_kwargs.get('mysql_conn')
    dt = op_kwargs.get('ds')
    overwrite = op_kwargs.get('overwrite')

    hive_columns = get_hive_table_columns(hive_cursor, hive_db, hive_table)
    cols = []
    mcols = []
    for v in hive_columns:
        if "int" in v['type']:
            cols.append("if(`{}` is NULL, 0, `{}`)".format(
                v['name'].lower(), v['name'].lower()))
        elif v['type'] == 'float' or v['type'] == 'double' or v[
                'type'] == 'decimal':
            cols.append("if(`{}` is NULL, '0.00', `{}`)".format(
                v['name'].lower(), v['name'].lower()))
        elif v['type'] == 'array' or v['type'] == 'map' or v[
                'type'] == 'struct':
            cols.append("''")
        else:
            cols.append("if(`{}` is NULL, '', `{}`)".format(
                v['name'].lower(), v['name'].lower()))

        mcols.append(v['name'].lower())
    new_table = create_bi_mysql_table(mysql_cursor, hive_db, hive_table,
                                      hive_columns)
    if new_table:  # 新表 全量
        hql = '''
            SELECT 
                {cols} 
            FROM {db}.{table} 
        '''.format(db=hive_db, table=hive_table, cols=",".join(cols))
    else:  # 增量
        hql = '''
            SELECT 
                {cols}
            FROM {db}.{table} 
            WHERE dt = '{dt}'
        '''.format(db=hive_db, table=hive_table, cols=",".join(cols), dt=dt)
    logging.info(hql)
    wxapi = ComwxApi('wwd26d45f97ea74ad2',
                     'BLE_v25zCmnZaFUgum93j3zVBDK-DjtRkLisI_Wns4g', '1000011')
    try:
        mconn = get_db_conn(mysql_cursor)
        mcursor = mconn.cursor()  # mysql_connectors[mysql_cursor]
        if overwrite:
            mcursor.execute("TRUNCATE TABLE {db}.{table}".format(
                db=hive_db, table=hive_table))
        else:
            mcursor.execute(
                "DELETE FROM {db}.{table} WHERE dt = '{dt}'".format(
                    db=hive_db, table=hive_table, dt=dt))
        isql = 'replace into {db}.{table} (`{cols}`) values '.format(
            db=hive_db, table=hive_table, cols='`,`'.join(mcols))

        hive_cursor.execute(hql)
        rows = []
        cnt = 0
        while True:
            try:
                record = hive_cursor.next()
            except:
                record = None
            # logging.info(record)
            if not record:
                break
            rows.append("('{}')".format("','".join([
                str(MySQLdb.escape_string(str(x)), encoding="utf-8")
                for x in record
            ])))
            # logging.info(rows)
            cnt += 1
            if cnt >= 1000:
                logging.info(cnt)
                mcursor.execute("{h} {v}".format(h=isql, v=",".join(rows)))
                cnt = 0
                rows = []

        # logging.info(rows)
        if cnt > 0:
            logging.info("last: {}".format(cnt))
            mcursor.execute("{h} {v}".format(h=isql, v=",".join(rows)))
        mcursor.close()
        hive_cursor.close()
    except BaseException as e:
        logging.info(e)
        mcursor.close()
        hive_cursor.close()
        wxapi.postAppMessage(
            '重要重要重要:{}.{}数据写入mysql异常【{}】'.format(hive_db, hive_table, dt),
            '271')
コード例 #27
0
ファイル: query_data.py プロジェクト: lishuailishuai/shanchu2
def query_data(**op_kwargs):
    dt = op_kwargs.get('ds')
    cursor = get_hive_cursor()
    cursor.execute("set hive.execution.engine=tez")
    repair_table_names = [
        "data_driver_extend", "data_driver_reward", "data_order",
        "data_order_payment", "data_user_extend", "user_action", "client_event"
    ]
    for name in repair_table_names:
        print(name)
        db_name = "oride_source."
        if name.startswith("data"):
            db_name = "oride_db."
        cursor.execute(repair_table_query % (db_name + name))
    cursor.execute(query1.format(dt=dt))
    res1 = cursor.fetchall()
    res1 = map(mapper, list(res1[0]))
    [
        call_num, success_num, gmv, cancel_before_dispatching_num,
        cancel_after_dispatching_by_user_num,
        cancel_after_dispatching_by_driver_num, pickup_num, pickup_total_time,
        take_num, take_total_time, total_driver_price
    ] = res1
    print(1)
    cursor.execute(query2.format(dt=dt))
    res2 = cursor.fetchall()
    res2 = map(mapper, list(res2[0]))
    [pay_num, total_price, total_c_discount, offline_num] = res2
    print(2)
    cursor.execute(query4.format(dt=dt))
    res4 = cursor.fetchall()
    res4 = map(mapper, list(res4[0]))
    [call_user_num, finished_user_num, new_finished_user_num] = res4
    print(4)
    cursor.execute(query5.format(dt=dt))
    res5 = cursor.fetchall()
    res5 = map(mapper, list(res5[0]))
    [total_driver_num, login_driver_num, new_driver_num] = res5
    print(5)
    cursor.execute(query6.format(dt=dt))
    res6 = cursor.fetchall()
    res6 = map(mapper, list(res6[0]))
    [order_driver_num, finished_driver_num, new_finished_driver_num] = res6
    print(6)
    cursor.execute(query7.format(dt=dt))
    res7 = cursor.fetchall()
    res7 = map(mapper, list(res7[0]))
    [bubble_num] = res7
    print(7)
    cursor.execute(query9.format(dt=dt))
    res9 = cursor.fetchall()
    res9 = map(mapper, list(res9[0]))
    [new_passenger_num] = res9
    print(9)
    (transport_efficiency, avg_order_per_driver,
     online_driver_num) = get_driver_data(dt)
    print(10)
    data = [
        success_num, success_num / float(call_num) if call_num > 0 else 0,
        bubble_num, call_num,
        call_num / float(bubble_num) if bubble_num > 0 else 0,
        online_driver_num, order_driver_num,
        round(float(gmv), 2),
        round(float(gmv) / float(success_num) if success_num > 0 else 0, 2),
        round(float(total_driver_price), 2),
        round(float(total_c_discount), 2),
        round(
            float(total_driver_price) /
            float(success_num) if success_num > 0 else 0, 2),
        round(
            float(total_c_discount) /
            float(success_num) if success_num > 0 else 0, 2),
        float(total_driver_price + total_c_discount) /
        float(total_price) if total_price > 0 else 0,
        cancel_before_dispatching_num / float(call_num) if call_num > 0 else 0,
        cancel_after_dispatching_by_user_num /
        float(call_num) if call_num > 0 else 0,
        cancel_after_dispatching_by_driver_num /
        float(call_num) if call_num > 0 else 0,
        round(
            pickup_total_time /
            float(pickup_num * 60) if pickup_num > 0 else 0, 2),
        round(take_total_time / float(take_num) if take_num > 0 else 0,
              2), total_driver_num, new_driver_num, finished_driver_num,
        new_finished_driver_num, new_finished_driver_num /
        float(finished_driver_num) if finished_driver_num > 0 else 0,
        call_user_num, finished_user_num, new_passenger_num,
        new_finished_user_num, new_finished_user_num / float(finished_user_num)
        if finished_driver_num > 0 else 0, new_finished_user_num /
        new_passenger_num if new_passenger_num > 0 else 0, pay_num -
        offline_num, offline_num, transport_efficiency, 0, avg_order_per_driver
    ]
    insert_data = [None, dt] + data
    sql_conn = get_db_conn()
    sql_cursor = sql_conn.cursor()
    sql_cursor.execute(INSERT_SQL, insert_data)
コード例 #28
0
def create_bi_mysql_table(conn, db, table, columns):
    #if conn not in mysql_connectors:
    mconn = get_db_conn(conn)
    #    mysql_connectors[conn] = mconn.cursor()
    #mcursor = mysql_connectors[conn]
    mcursor = mconn.cursor()
    sql = '''
        SELECT 
            COLUMN_NAME, 
            DATA_TYPE  
        FROM information_schema.COLUMNS 
        WHERE TABLE_SCHEMA='{db}' AND 
            TABLE_NAME='{table}' 
        ORDER BY ORDINAL_POSITION
    '''.format(db=db, table=table)
    mcursor.execute(sql)
    res = mcursor.fetchall()
    # mysql表不存在
    if len(res) <= 0:
        cols = []
        for v in columns:
            types = type_map.get(v['type'].lower().strip(), {
                "type": "varchar",
                "ext": "(255) not null default ''"
            })
            cols.append("`{name}` {type}{ext} comment '{comment}'".format(
                name=v['name'],
                type=types['type'],
                ext=types['ext'],
                comment=v['comment']))
        mcursor.execute("CREATE DATABASE IF NOT EXISTS {}".format(db))
        sql = '''
            CREATE TABLE IF NOT EXISTS {db}.{table} (
                {columns}
            )engine=InnoDB default charset=utf8mb4
        '''.format(db=db, table=table, columns=",\n".join(cols))
        logging.info(sql)
        mcursor.execute(sql)
        mcursor.close()
        return True

    # mysql表存在
    mysql_columns = {}
    for (name, d_type) in res:
        name = name.lower().strip()
        mysql_columns[name] = d_type.lower().strip()

    sql = 'ALTER TABLE {db}.{table} '.format(db=db, table=table)
    for k, v in enumerate(columns):
        types = type_map.get(v['type'].lower().strip(), {
            "type": "varchar",
            "ext": "(255) not null default ''"
        })

        mysql_coltype = mysql_columns.get(v['name'], None)
        if not mysql_coltype:
            if k == 0:
                alter_sql = "add `{name}` {type} comment '{comment}' first".format(
                    name=v['name'],
                    type=types['type'] + types['ext'],
                    comment=v['comment'])
            else:
                alter_sql = "add `{name}` {type} comment '{comment}' after {prev}".format(
                    name=v['name'],
                    type=types['type'] + types['ext'],
                    comment=v['comment'],
                    prev=columns[k - 1]['name'].lower())
            logging.info(sql + alter_sql)
            mcursor.execute(sql + alter_sql)
        else:
            if types['type'] != mysql_coltype:
                alter_sql = "change `{name}` `{name}` {type} comment '{comment}'".format(
                    name=name,
                    type=types['type'] + types['ext'],
                    comment=v['comment'])
                logging.info(sql + alter_sql)
                mcursor.execute(sql + alter_sql)

    mcursor.close()
    return False
コード例 #29
0
def get_data_from_impala(**op_kwargs):
    ds = op_kwargs.get(
        'ds', time.strftime('%Y-%m-%d', time.localtime(time.time() - 86400)))
    sql = '''
        WITH
        --线路数据
        driver_data as 
        (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                city_id,
                id,
                real_name                                    --司机名字
            from obus_dw_ods.ods_sqoop_data_driver_df 
            where dt='{pt}' 
        ),
        --工作数据
        work_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                dr.city_id,
                dr.id as driver_id,
                sum(if(dw.serv_mode=1 and dw.serv_mode1=0, round(abs(dw.create_time2-dw.create_time)/3600,2), 0)) as work_dur             --司机今日在线时长(小时)
            from 
                (select 
                    driver_id,
                    serv_mode,
                    create_time,
                    lead(serv_mode,1,0) over(partition by driver_id order by create_time) serv_mode1,
                    lead(create_time,1,unix_timestamp('{pt} 23:59:59','yyyy-MM-dd HH:mm:ss')) over(partition by driver_id order by create_time) create_time2
                from obus_dw_ods.ods_sqoop_data_driver_work_log_df 
                where dt='{pt}' and 
                    from_unixtime(create_time, 'yyyy-MM-dd')='{pt}'
                ) as dw 
            join (select 
                    id, 
                    city_id 
                from obus_dw_ods.ods_sqoop_data_driver_df 
                where dt='{pt}'
                ) as dr 
            on dw.driver_id = dr.id 
            group by dr.city_id, dr.id
        ),
        --司机圈数
        driver_cycle_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                city_id,
                driver_id,
                count(distinct id)/2 as cycle_cnt                             --司机圈数
            from obus_dw_ods.ods_sqoop_data_driver_trip_df 
            where dt = '{pt}' and 
                from_unixtime(end_time, 'yyyy-MM-dd') = '{pt}' and 
                status = 1 
            group by city_id, driver_id 
        ),
        --司机驾驶时长
        driver_time as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                city_id,
                driver_id,
                round(sum(end_time - start_time)/3600, 2) as driver_time            --司机驾驶时长
            from obus_dw_ods.ods_sqoop_data_driver_trip_df 
            where dt = '{pt}' and 
                from_unixtime(end_time, 'yyyy-MM-dd') = '{pt}' and 
                status = 1
            group by city_id, driver_id
        ),
        --收入数据
        income_data as (
            select 
                from_unixtime(unix_timestamp('{pt}','yyyy-MM-dd'), 'yyyyMMdd') as dt,
                dd.city_id,
                dd.id,
                sum(ddrd.amount_true) as driver_amount,                             ---司机收入
                sum(ddrd.amount_pay_obus) as obus_pay_driver_amount,                ---Obus支付司机收入
                sum(ddrd.amount_pay_ticket) as tickets_pay_driver_amount            --公交卡支付司机收入
            from (select 
                    driver_id,
                    amount_true,
                    amount_pay_obus,
                    amount_pay_ticket
                from obus_dw_ods.ods_sqoop_data_driver_records_day_df 
                where dt='{pt}' and 
                    `day`=unix_timestamp('{pt}','yyyy-MM-dd') 
                ) as ddrd 
            join (select 
                    id,
                    city_id
                from obus_dw_ods.ods_sqoop_data_driver_df 
                where dt='{pt}'
                ) as dd 
            on ddrd.driver_id = dd.id 
            group by dd.city_id, dd.id
        )

        --结果集
        select 
            *,
            row_number() over(partition by city_id order by driver_amount desc) num
        from 
            (select 
                driver_data.dt,
                driver_data.city_id,
                nvl(dc.name, ''),
                driver_data.id,
                driver_data.real_name,
                IF(work_data.work_dur IS NULL, 0, work_data.work_dur),
                IF(driver_cycle_data.cycle_cnt IS NULL, 0, driver_cycle_data.cycle_cnt),
                round(if(driver_cycle_data.cycle_cnt>0, driver_time.driver_time/driver_cycle_data.cycle_cnt, 0), 2),
                IF(income_data.driver_amount IS NULL, 0, income_data.driver_amount) as driver_amount,
                IF(income_data.obus_pay_driver_amount IS NULL, 0, income_data.obus_pay_driver_amount),
                IF(income_data.tickets_pay_driver_amount IS NULL, 0, income_data.tickets_pay_driver_amount)
            from driver_data 
            left join work_data on driver_data.dt=work_data.dt and 
                                    driver_data.city_id=work_data.city_id and 
                                    driver_data.id=work_data.driver_id 
            left join driver_cycle_data on driver_data.dt = driver_cycle_data.dt and 
                                    driver_data.city_id = driver_cycle_data.city_id and 
                                    driver_data.id = driver_cycle_data.driver_id 
            left join driver_time on driver_data.dt = driver_time.dt and 
                                    driver_data.city_id = driver_time.city_id and 
                                    driver_data.id = driver_time.driver_id  
            left join income_data on driver_data.dt = income_data.dt and 
                                    driver_data.city_id = income_data.city_id and 
                                    driver_data.id = income_data.id
            left join (select id, name from obus_dw_ods.ods_sqoop_conf_city_df where dt='{pt}' and validate=1) as dc 
                on driver_data.city_id = dc.id 
            ) as t
    '''.format(pt=ds)
    logging.info(sql)
    hive_cursor = get_hive_cursor()
    hive_cursor.execute(sql)
    result = hive_cursor.fetchall()

    mysql_conn = get_db_conn('mysql_bi')
    mcursor = mysql_conn.cursor()
    __data_to_mysql(
        mcursor, result, [
            'dt', 'city_id', 'city', 'driver_id', 'driver_name', 'serv_time',
            'cycle_cnt', 'avg_time', 'driver_amount', 'obus_pay_driver_amount',
            'tickets_pay_driver_amount', 'num'
        ], '''
                        serv_time=values(serv_time),
                        cycle_cnt=values(cycle_cnt),
                        avg_time=values(avg_time),
                        driver_amount=values(driver_amount),
                        obus_pay_driver_amount=values(obus_pay_driver_amount),
                        tickets_pay_driver_amount=values(tickets_pay_driver_amount),
                        num=values(num)
                    ''')

    hive_cursor.close()
    mcursor.close()
コード例 #30
0
def first_driver_data(**op_kwargs):
    cursor = get_hive_cursor()
    dt = op_kwargs.get('ds')
    cursor.execute("SET mapreduce.job.queuename=root.airflow")
    cursor.execute("SET hive.exec.parallel=true")
    hql = """
        SELECT 
            uc.code,
            from_unixtime(unix_timestamp(ro.dt,'yyyy-MM-dd'), 'yyyyMMdd') AS day,
            COUNT(distinct ro.driver_id) as u, 
            unix_timestamp() 
        FROM (SELECT  
                r.driver_id,
                p.code 
            FROM (SELECT 
                    driver_id,
                    know_orider_extend  
                FROM oride_dw_ods.ods_sqoop_mass_rider_signups_df 
                WHERE dt = '{ds}' and 
                    know_orider = 4
                ) AS r 
            JOIN (select 
                    code, 
                    name 
                FROM oride_dw_ods.ods_sqoop_promoter_promoter_user_df 
                WHERE dt='{ds}' 
                ) AS p
            ON r.know_orider_extend = p.name 
            ) AS uc 
        JOIN (SELECT 
                dt,
                driver_id,
                arrive_time,
                row_number() over(partition by driver_id order by arrive_time) orders
            FROM oride_dw.dwd_oride_order_base_include_test_di 
            WHERE status IN (4,5) AND 
                dt = '{ds}' 
            ) as ro 
        ON uc.driver_id = ro.driver_id 
        WHERE ro.orders = 1 AND 
            from_unixtime(ro.arrive_time,'yyyy-MM-dd')='{ds}' 
        GROUP BY uc.code, ro.dt
    """.format(ds=dt)
    logging.info(hql)
    cursor.execute(hql)
    res = cursor.fetchall()
    mconn = get_db_conn('opay_spread_mysql')
    mysql = mconn.cursor()
    sql = 'insert into promoter_data_day (code, day, dft, create_time) values '
    ext = ' on duplicate key update dft=values(dft), create_time=values(create_time)'
    vals = []
    for (c, d, f, t) in res:
        vals.append("('{c}', '{d}', '{f}', '{t}')".format(c=c, d=d, f=f, t=t))
        if len(vals) >= 1000:
            # logging.info(sql + ",".join(vals) + ext)
            mysql.execute(sql + ",".join(vals) + ext)
            vals = []

    if len(vals) > 0:
        # logging.info(sql + ",".join(vals) + ext)
        mysql.execute(sql + ",".join(vals) + ext)

    mysql.close()
    cursor.close()