Exemple #1
0
 def __init__(self):
     from common.TempletLoader import TempletLoader
     self.__templete = TempletLoader('templets/module_inout_analize.txt')
     self.__city_dict = {'广州': ['机场南', '广州东站'], }
     self.__params = {}
     self.__data = {}
     self.name = "module_inout_analize"
 def __init__(self):
     from common.TempletLoader import TempletLoader
     self.__templete = TempletLoader('templets/module_jamanalize.txt')
     self.__params = {}
     self.__data = {}
     self.name = "module_jamanalize"
     self.__time_period = [(7, 9), (17, 19)]  # 长度必须为2,代表早晚上班时段
Exemple #3
0
class Module(object):
    def __init__(self):
        from common.TempletLoader import TempletLoader
        self.__templete = TempletLoader('templets/module_ticketrate.txt')
        self.__params = {}
        self.name = "module_ticketrate"

    def run(self, df, global_params=None):
        # STATUS ==5 的是交易成功的
        df['is_success'] = df['order_status'].apply(lambda x: 1 if x == 5 else 0)
        status = df.groupby(['is_success']).ticket_num.count()
        st_status = df.groupby(['entry_station']).apply(lambda df: np.mean(df['is_success'])).reset_index()
        st_status.columns = ['entry_station', 'rate']
        st = df.groupby(['entry_station'], as_index=False).ticket_num.count()
        st_status = st_status.merge(st, on=['entry_station'], how='left')
        st_status['success_ticket'] = st_status['ticket_num'] * st_status['rate']
        st_status['success_ticket'] = st_status['success_ticket'].apply(lambda x: int(round(x)))
        st_status['fail_ticket'] = st_status['ticket_num'] - st_status['success_ticket']
        st_status = st_status.sort_values('rate', ascending=True)
        # print(st_status.head())

        self.__params['M5_total_rate'] = status[1] / status.sum()
        self.__params['M5_tail_stations'] = st_status.entry_station[:5].tolist()
        self.__params['M5_success_tk'] = st_status.success_ticket[:5].tolist()
        self.__params['M5_fail_tk'] = st_status.fail_ticket[:5].tolist()
        self.__params['M5_rate'] = st_status.rate[:5].tolist()
        # print(self.__params)

        params = {}
        params['M5_total_rate'] = self.__params['M5_total_rate']
        params['M5_tail_stations'] = self.__params['M5_tail_stations']
        params['M5_success_tk'] = self.__params['M5_success_tk']
        params['M5_fail_tk'] = self.__params['M5_fail_tk']
        params['M5_rate'] = self.__params['M5_rate']

        self.__data = params
        global_params['M5_total_rate'] = '%.2f'%(params['M5_total_rate']*100)
        global_params['M5_tail_stations'] = '、'.join(params['M5_tail_stations'][:3])

    def maketext(self, global_params=None):
        # 允许传入全局变量, 但局部变量的优先级更高
        if global_params and type(global_params) == dict:
            for param in global_params:
                if param not in self.__params:
                    self.__params[param] = global_params[param]
        # 如果有缺失的变量, 填空字符串
        for param in self.__templete.get_params():
            if param not in self.__params:
                self.__params[param] = ''
        # 返回format结果
        return self.__templete.format_templet(self.__params)

    def makedata(self):
        from common.MyEncoder import MyEncoder
        return json.dumps(dict(self.__data), ensure_ascii=False, cls=MyEncoder)
Exemple #4
0
class Module(object):
    def __init__(self):
        from common.TempletLoader import TempletLoader
        self.__templete = TempletLoader('templets/module_userstay.txt')
        self.__params = {}
        self.name = "module_userstay"

    def run(self, df, global_params=None):
        # STATUS ==5 的是交易成功的
        df_suc = df[df['order_status'] == 5].copy()
        single_ft = df_suc.groupby(['owner_id'
                                    ])['reg_date'].min().reset_index()
        single_ft = single_ft.rename(index=str,
                                     columns={'reg_date': 'first_time'})
        df_suc = df_suc.merge(single_ft, on=['owner_id'], how='left')
        # print(df_suc[['owner_id', 'first_time', 'entry_date', 'reg_date']].head())
        df_suc['time'] = pd.to_datetime(df_suc['first_time'],
                                        format='%Y-%m-%d %H:%M:%S')
        df_suc['reg_date'] = df_suc['reg_date'].astype(str)
        df_suc['day'] = df_suc['time'].apply(lambda x: x.dayofweek)
        df_suc['is_weekend'] = df_suc['day'].apply(lambda x: 1
                                                   if x == 0 or x == 6 else 0)
        us_date, us_num = day_actitve_num_print(
            df_suc[df_suc['is_weekend'] == 1])

        self.__params['US_date'] = us_date
        self.__params['US_num'] = us_num
        # print(self.__params)

        params = {}
        params['US_date'] = self.__params['US_date']
        params['US_num'] = self.__params['US_num']

        self.__data = params

    def maketext(self, global_params=None):
        # 允许传入全局变量, 但局部变量的优先级更高
        if global_params and type(global_params) == dict:
            for param in global_params:
                if param not in self.__params:
                    self.__params[param] = global_params[param]
        # 如果有缺失的变量, 填空字符串
        for param in self.__templete.get_params():
            if param not in self.__params:
                self.__params[param] = ''
        # 返回format结果
        return self.__templete.format_templet(self.__params)

    def makedata(self):
        from common.MyEncoder import MyEncoder
        return json.dumps(dict(self.__data), ensure_ascii=False, cls=MyEncoder)
Exemple #5
0
class Module(object):
    def __init__(self):
        from common.TempletLoader import TempletLoader
        self.__templete = TempletLoader('templets/module0.txt')
        self.__params = {}
        for param in self.__templete.get_params():
            self.__params[param] = ''

    def run(self, df):

        pass

    def maketext(self):
        return self.__templete.format_templet(self.__params)

    def makedata(self):  # js? db? whatever.
        return ''
class Module(object):
    def __init__(self):
        from common.TempletLoader import TempletLoader
        self.__templete = TempletLoader('templets/module0.txt')
        self.__params = {}
        self.__data = {}
        self.name = "module_usertimes"

    def run(self, df, global_params=None):
        if global_params is None:
            global_params = {}
        user_counts = df[df.order_status == 5].groupby(
            'owner_id').order_no.count().reset_index().rename(
                columns={'order_no': 'user_counts'})
        user_counts['user_counts_level'] = user_counts.user_counts.map(
            lambda x: '1次' if x == 1 else '2-5次' if x <= 5 else '6-20次'
            if x <= 20 else '20次以上')
        user_counts = user_counts.groupby(
            'user_counts_level').owner_id.count().to_dict()
        # 填数据
        self.__data['levels'] = list(user_counts.keys())
        self.__data['user_times'] = user_counts
        counts_sum = sum(user_counts.values())
        self.__data['user_percent'] = {
            each: user_counts[each] / counts_sum
            for each in user_counts
        }

    def maketext(self, global_params=None):
        # 允许传入全局变量, 但局部变量的优先级更高
        if global_params and type(global_params) == dict:
            for param in global_params:
                if param not in self.__params:
                    self.__params[param] = global_params[param]
        # 如果有缺失的变量, 填空字符串
        for param in self.__templete.get_params():
            if param not in self.__params:
                self.__params[param] = ''
        # 返回format结果
        return self.__templete.format_templet(self.__params)

    def makedata(self):
        import json
        from common.MyEncoder import MyEncoder
        return json.dumps(dict(self.__data), ensure_ascii=False, cls=MyEncoder)
class Module(object):
    def __init__(self):
        from common.TempletLoader import TempletLoader
        self.__templete = TempletLoader('templets/module0.txt')
        self.__params = {}
        self.__data = {}
        self.name = "module_newuseranalize"

    def run(self, df, global_params=None):
        if global_params is None:
            global_params = {}
        # 每个用户第一次使用作为新增
        user_reg_date = df.groupby('owner_id').reg_date.min().reset_index().rename(
            columns={'reg_date': 'user_reg_date'})
        # 计数, 每天新增人数
        user_reg_date['user_reg_day'] = user_reg_date.user_reg_date.map(lambda x:str(x)[:10])
        new_user_day_count = user_reg_date['user_reg_day'].value_counts().sort_index()
        # 填数据
        self.__data['new_user_day_count'] = [new_user_day_count.index.tolist(), new_user_day_count.tolist()]


    def maketext(self, global_params=None):
        # 允许传入全局变量, 但局部变量的优先级更高
        if global_params and type(global_params) == dict:
            for param in global_params:
                if param not in self.__params:
                    self.__params[param] = global_params[param]
        # 如果有缺失的变量, 填空字符串
        for param in self.__templete.get_params():
            if param not in self.__params:
                self.__params[param] = ''
        # 返回format结果
        return self.__templete.format_templet(self.__params)

    def makedata(self):
        import json
        from common.MyEncoder import MyEncoder
        return json.dumps(dict(self.__data), ensure_ascii=False, cls=MyEncoder)
        # return ''
Exemple #8
0
class Module(object):
    def __init__(self):
        from common.TempletLoader import TempletLoader
        self.__templete = TempletLoader('templets/module1.txt')
        self.__params = {}

    def run(self, df):
        # STATUS ==5 的是交易成功的
        df_suc = df[df['ORDER_STATUS'] == 5]
        tmp = df_suc.groupby(['START_NAME', 'END_NAME'
                              ]).SINGLE_TICKET_NUM.sum().reset_index()
        starts = tmp.groupby('START_NAME').SINGLE_TICKET_NUM.sum().sort_values(
            ascending=False)
        ends = tmp.groupby('END_NAME').SINGLE_TICKET_NUM.sum().sort_values(
            ascending=False)
        self.__params['start_top10'] = starts[:10].index.tolist()
        self.__params['end_top10'] = ends[:10].index.tolist()
        self.__params['start_top10_percent'] = starts[:10].sum() / starts.sum(
        ) * 100
        self.__params['end_top10_precent'] = ends[:10].sum() / ends.sum() * 100

    def maketext(self, global_params=None):
        # 允许传入全局变量, 但局部变量的优先级更高
        if global_params and type(global_params) == dict:
            for param in global_params:
                if param not in self.__params:
                    self.__params[param] = global_params[param]
        # 如果有缺失的变量, 填空字符串
        for param in self.__templete.get_params():
            if param not in self.__params:
                self.__params[param] = ''
        # 返回format结果
        return self.__templete.format_templet(self.__params)

    def makedata(self):
        return ''
class Module(object):
    def __init__(self):
        from common.TempletLoader import TempletLoader
        self.__templete = TempletLoader('templets/module_hotstation.txt')
        self.__params = {}
        self.name = "module_hotstation"

    def run(self, df, global_params=None):
        if global_params is None:
            global_params = {}
        # STATUS ==5 的是交易成功的
        df_suc = df[df['order_status'] == 5].copy()
        df_suc['entry_date'] = df_suc['entry_date'].astype(str)
        df_suc['date'] = df_suc['entry_date'].apply(lambda x: x[0:10])
        # df_suc['time'] = pd.to_datetime(df_suc['entry_date'], format='%Y-%m-%d %H:%M:%S')

        tmp = df_suc.groupby(['entry_station',
                              'exit_station']).ticket_num.sum().reset_index()
        starts = tmp.groupby('entry_station').ticket_num.sum().sort_values(
            ascending=False)
        ends = tmp.groupby('exit_station').ticket_num.sum().sort_values(
            ascending=False)
        station = pd.DataFrame()
        station['st_name'] = list(
            set(list(df_suc['entry_station']) + list(df_suc['exit_station'])))
        starts = pd.DataFrame(starts)
        starts = starts.reset_index()
        ends = pd.DataFrame(ends)
        ends = ends.reset_index()
        station = station.rename(index=str,
                                 columns={'st_name': 'entry_station'})
        # print(station.head())
        station = station.merge(starts, on=['entry_station'], how='left')
        station = station.rename(index=str,
                                 columns={'entry_station': 'exit_station'})
        station = station.merge(ends, on=['exit_station'], how='left')
        station['total_ticket'] = station.ticket_num_x + station.ticket_num_y
        station = station.sort_values(by=['total_ticket'], ascending=False)
        # print(station.head())

        trend = df_suc.groupby(['entry_station',
                                'date']).ticket_num.sum().reset_index()
        # print(trend.head())

        routes_groupby = df_suc.groupby([
            'entry_station', 'exit_station'
        ]).ticket_num.sum().sort_values(ascending=False).index.tolist()[:10]

        routes = reduce(operator.add, routes_groupby)
        routes = sorted(dict(Counter(routes)).items(),
                        key=lambda x: x[1],
                        reverse=True)[:10]

        self.__params['M2_hotstations'] = station[
            station.total_ticket >
            station.total_ticket.mean()].exit_station.tolist()
        self.__params['M2_hotroutes'] = routes_groupby
        self.__params['M2_hotroutes_topstations'] = [
            route[0] for route in routes
        ][:5]
        # print(self.__params)

        params = {}
        params['M2_hotstations'] = self.__params['M2_hotstations']
        params['M2_hotstations_ticketnum'] = station[
            station.total_ticket >
            station.total_ticket.mean()].total_ticket.tolist()
        params['M2_hotstations_trend1_time'] = trend[
            trend.entry_station == self.__params['M2_hotstations']
            [0]].date.tolist()
        params['M2_hotstations_trend1'] = trend[trend.entry_station ==
                                                self.__params['M2_hotstations']
                                                [0]].ticket_num.tolist()
        params['M2_hotstations_trend2_time'] = trend[
            trend.entry_station == self.__params['M2_hotstations']
            [1]].date.tolist()
        params['M2_hotstations_trend2'] = trend[trend.entry_station ==
                                                self.__params['M2_hotstations']
                                                [1]].ticket_num.tolist()
        params['M2_hotstations_trend3_time'] = trend[
            trend.entry_station == self.__params['M2_hotstations']
            [2]].date.tolist()
        params['M2_hotstations_trend3'] = trend[trend.entry_station ==
                                                self.__params['M2_hotstations']
                                                [2]].ticket_num.tolist()
        params['M2_hotroutes'] = routes_groupby
        params['M2_hotroutes_ticketnum'] = df_suc.groupby([
            'entry_station', 'exit_station'
        ]).ticket_num.sum().sort_values(ascending=False).tolist()[:10]

        self.__data = params
        global_params['M2_hotstations'] = '、'.join(params['M2_hotstations'])
        for i in range(3):
            global_params['M2_top%d' % (i + 1)] = params['M2_hotstations'][i]
        for i in range(3):
            global_params['M2_hotroutes'] = '、'.join(
                '-'.join(each) for each in params['M2_hotroutes'][:2])
        global_params['M2_hotroutes_topstations'] = '、'.join(
            self.__params['M2_hotroutes_topstations'][:3])

    def maketext(self, global_params=None):
        # 允许传入全局变量, 但局部变量的优先级更高
        if global_params and type(global_params) == dict:
            for param in global_params:
                if param not in self.__params:
                    self.__params[param] = global_params[param]
        # 如果有缺失的变量, 填空字符串
        for param in self.__templete.get_params():
            if param not in self.__params:
                self.__params[param] = ''
        # 返回format结果
        return self.__templete.format_templet(self.__params)

    def makedata(self):
        from common.MyEncoder import MyEncoder
        return json.dumps(dict(self.__data), ensure_ascii=False, cls=MyEncoder)
Exemple #10
0
class Module(object):
    def __init__(self):
        from common.TempletLoader import TempletLoader
        self.__templete = TempletLoader('templets/module_inout_analize.txt')
        self.__city_dict = {'广州': ['机场南', '广州东站'], }
        self.__params = {}
        self.__data = {}
        self.name = "module_inout_analize"

    def run(self, df, global_params=None):
        if global_params is None:
            global_params = {}
        df_suc = df[df.order_status == 5][['ticket_num', 'entry_date', 'entry_station', 'exit_station']]
        df_suc['weekday'] = df_suc.entry_date.map(lambda x: x.weekday())
        df_suc['date'] = df_suc.entry_date.map(lambda x: str(x)[:10])
        stations = self.__city_dict[global_params.get('city', '广州')]

        # 日进出站人数
        entry_day_nums = {}
        exit_day_nums = {}
        # # 日进出站排名
        # entry_ranks = {}
        # exit_ranks = {}
        # 周末去向分布
        weekend_exits = {}
        for station in stations:
            # 日进出站人数
            entry_day_count = df_suc[df_suc.entry_station == station].groupby('date').ticket_num.sum().sort_index()
            exit_day_count = df_suc[df_suc.exit_station == station].groupby('date').ticket_num.sum().sort_index()
            # # 日进出站排名
            # entry_rank = df_suc[df_suc.entry_station == station].groupby('entry_station').ticket_num.sum().sort_values(
            #     ascending=False).head(20)
            # exit_rank = df_suc[df_suc.exit_station == station].groupby('exit_station').ticket_num.sum().sort_values(
            #     ascending=False).head(20)

            # 各站点客流去向,放进字典
            entry_tmp = df_suc[df_suc.entry_station == station].groupby('exit_station').ticket_num.sum().sort_values(
                ascending=False).head(20)
            entry_day_nums[station] = entry_day_count
            exit_day_nums[station] = exit_day_count
            # entry_ranks[station] = entry_rank
            # exit_ranks[station] = exit_rank
            weekend_exits[station] = entry_tmp

        # 填参数
        self.__params['stations'] = stations
        self.__params['stations_all'] = ','.join(stations)
        for i in range(len(stations)):
            self.__params['st_%d_wk_top3' % i] = '、'.join(weekend_exits[stations[i]].head(3).index.tolist())
        # 填数据
        self.__data['stations'] = stations
        self.__data['entry_nums'] = {station: [entry_day_nums[station].index.tolist(), entry_day_nums[station].tolist()]
                                     for station in stations}
        self.__data['exit_nums'] = {station: [exit_day_nums[station].index.tolist(), exit_day_nums[station].tolist()]
                                    for station in stations}
        # self.__data['entry_ranks'] = {station:[entry_ranks[station].index.tolist(), entry_ranks[station].tolist()] for station in stations}
        # self.__data['exit_ranks'] =
        self.__data['weekend_exits'] = {
        station: [weekend_exits[station].index.tolist(), weekend_exits[station].tolist()] for station in stations}
        global_params['stations_all'] = '、'.join(stations)
        for i in range(len(stations)):
            global_params['stations[%d]'%i] = stations[i]
            global_params['st_%d_wk_top3'%i] = '、'.join(self.__data['weekend_exits'][stations[i]][0][:3])

    def maketext(self, global_params=None):
        # 允许传入全局变量, 但局部变量的优先级更高
        if global_params and type(global_params) == dict:
            for param in global_params:
                if param not in self.__params:
                    self.__params[param] = global_params[param]
        # 如果有缺失的变量, 填空字符串
        for param in self.__templete.get_params():
            if param not in self.__params:
                self.__params[param] = ''
        # 返回format结果
        return self.__templete.format_templet(self.__params)

    def makedata(self):
        import json
        from common.MyEncoder import MyEncoder
        return json.dumps(dict(self.__data), ensure_ascii=False, cls=MyEncoder)
class Module(object):
    def __init__(self):
        from common.TempletLoader import TempletLoader
        self.__templete = TempletLoader('templets/module_peopleflow.txt')
        self.__params = {}
        self.name = "module_peopleflow"

    def run(self, df, global_params=None):
        # STATUS ==5 的是交易成功的
        df_suc = df[df['order_status'] == 5].copy()
        df_suc['time'] = pd.to_datetime(df_suc['entry_date'],
                                        format='%Y-%m-%d %H:%M:%S')
        df_suc['hour'] = df_suc['time'].apply(lambda x: x.hour)

        tmp = df_suc.groupby(['entry_station',
                              'hour']).ticket_num.sum().reset_index()
        starts = df_suc.groupby('entry_station').ticket_num.sum().sort_values(
            ascending=False)
        hour = tmp[tmp.entry_station == starts.index[0]][tmp.ticket_num > (
            tmp[tmp.entry_station == starts.index[0]].ticket_num.mean()) * 1.3]
        # print(hour.head())

        self.__params['M3_stations'] = starts[:3].index.tolist()
        self.__params['M3_station0_t1'] = hour.hour.min()
        self.__params['M3_station0_t2'] = hour.hour.max()
        # print(self.__params)

        params = {}
        params['M3_stations'] = self.__params['M3_stations']
        params['M3_stations_trend1_time'] = tmp[tmp.entry_station ==
                                                self.__params['M3_stations']
                                                [0]].hour.tolist()
        params['M3_stations_trend1'] = tmp[tmp.entry_station ==
                                           self.__params['M3_stations']
                                           [0]].ticket_num.tolist()
        params['M3_stations_trend2_time'] = tmp[tmp.entry_station ==
                                                self.__params['M3_stations']
                                                [1]].hour.tolist()
        params['M3_stations_trend2'] = tmp[tmp.entry_station ==
                                           self.__params['M3_stations']
                                           [1]].ticket_num.tolist()
        params['M3_stations_trend3_time'] = tmp[tmp.entry_station ==
                                                self.__params['M3_stations']
                                                [2]].hour.tolist()
        params['M3_stations_trend3'] = tmp[tmp.entry_station ==
                                           self.__params['M3_stations']
                                           [2]].ticket_num.tolist()

        self.__data = params
        global_params['M3_station0_t1'] = self.__params['M3_station0_t1']
        global_params['M3_station0_t2'] = self.__params['M3_station0_t2']
        for i in range(3):
            global_params['M3_stations[%d]' %
                          i] = self.__params['M3_stations'][i]

    def maketext(self, global_params=None):
        # 允许传入全局变量, 但局部变量的优先级更高
        if global_params and type(global_params) == dict:
            for param in global_params:
                if param not in self.__params:
                    self.__params[param] = global_params[param]
        # 如果有缺失的变量, 填空字符串
        for param in self.__templete.get_params():
            if param not in self.__params:
                self.__params[param] = ''
        # 返回format结果
        return self.__templete.format_templet(self.__params)

    def makedata(self):
        from common.MyEncoder import MyEncoder
        return json.dumps(dict(self.__data), ensure_ascii=False, cls=MyEncoder)
class Module(object):
    def __init__(self):
        from common.TempletLoader import TempletLoader
        self.__templete = TempletLoader('templets/module_ticketway.txt')
        self.__params = {}
        self.name = "module_ticketway"

    def run(self, df, global_params=None):
        # STATUS ==5 的是交易成功的
        # payment_type source
        payment_type_dict = {
            0: '支付宝',
            1: '中移动',
            2: '支付宝网上购票',
            3: '微信支付',
            4: '微信扫码支付',
            5: '翼支付',
            6: '支付宝网页支付',
            7: '微信公众号支付',
            8: '首信易支付',
            9: '中移动WAP支付',
            10: '银联支付',
            11: '银联支付',
            12: '微信小程序支付'
        }
        source_dict = {
            1: '盘缠ios',
            2: '盘缠android',
            3: '插件ios',
            4: '插件android',
            5: 'h5公众号或扫码支付',
            6: '非闪客蜂公众号',
            7: '咖啡',
            8: '长沙ios',
            9: '长沙android'
        }
        df_suc = df[df['order_status'] == 5].copy()

        tmp = df_suc.groupby(['entry_station',
                              'exit_station']).ticket_num.sum().reset_index()
        starts = df_suc.groupby('entry_station').ticket_num.sum().sort_values(
            ascending=False)

        df_suc.payment_type = df_suc.payment_type.fillna('0')
        df_suc.payment_type = df_suc.payment_type.astype(int)
        df_suc.source = df_suc.source.fillna('5')
        df_suc.source = df_suc.source.astype(int)

        df_suc.payment_type = df_suc.payment_type.map(payment_type_dict)
        df_suc.source = df_suc.source.map(source_dict)
        df_suc = df_suc[df_suc.entry_station.isin(starts[:10].index.tolist())]
        total = df_suc.shape[0]

        type = df_suc.groupby(
            ['payment_type']).ticket_num.count().sort_values(ascending=False)
        source = df_suc.groupby(
            ['source']).ticket_num.count().sort_values(ascending=False)
        type = type / total
        source = source / total
        # print(type.head())
        # print(source.head())

        self.__params['M4_stations'] = starts[:10].index.tolist()
        self.__params['M4_top_methods'] = source.index.tolist()
        self.__params['M4_top_source'] = type.index.tolist()
        self.__params['M4_top_perc'] = source.tolist()
        self.__params['M4_top_perc_source'] = type.tolist()
        # print(self.__params)

        params = {}
        params['M4_top_methods'] = self.__params['M4_top_methods']
        params['M4_top_perc'] = self.__params['M4_top_perc']
        params['M4_top_source'] = self.__params['M4_top_source']
        params['M4_top_perc_source'] = self.__params['M4_top_perc_source']

        self.__data = params
        global_params['M4_stations'] = self.__params['M4_stations']
        global_params['M4_top_methods[0]'] = self.__params['M4_top_methods'][0]
        global_params['M4_top_perc[0]'] = '%.2f' % (
            self.__params['M4_top_perc'][0] * 100)
        if len(self.__params['M4_top_methods']) > 1:
            global_params['M4_top_methods[1]'] = self.__params[
                'M4_top_methods'][1]
            global_params['M4_top_perc[1]'] = '%.2f' % (
                self.__params['M4_top_perc'][1] * 100)
        global_params['M4_top_source[0]'] = self.__params['M4_top_source'][0]
        global_params['M4_top_perc_source[0]'] = '%.2f' % (
            self.__params['M4_top_perc_source'][0] * 100)
        if len(self.__params['M4_top_source']) > 1:
            global_params['M4_top_source[1]'] = self.__params['M4_top_source'][
                1]
            global_params['M4_top_perc_source[1]'] = '%.2f' % (
                self.__params['M4_top_perc_source'][1] * 100)

    def maketext(self, global_params=None):
        # 允许传入全局变量, 但局部变量的优先级更高
        if global_params and type(global_params) == dict:
            for param in global_params:
                if param not in self.__params:
                    self.__params[param] = global_params[param]
        # 如果有缺失的变量, 填空字符串
        for param in self.__templete.get_params():
            if param not in self.__params:
                self.__params[param] = ''
        # 返回format结果
        return self.__templete.format_templet(self.__params)

    def makedata(self):
        from common.MyEncoder import MyEncoder
        return json.dumps(dict(self.__data), ensure_ascii=False, cls=MyEncoder)
class Module(object):
    def __init__(self):
        from common.TempletLoader import TempletLoader
        self.__templete = TempletLoader('templets/module_details.txt')
        self.__params = {}
        self.name = "module_details"

    def run(self, df, global_params=None):
        # STATUS ==5 的是交易成功的
        df_suc = df[df['order_status'] == 5].copy()
        df_suc = df_suc[~pd.isnull(df_suc.reg_date)]
        df_suc.reg_date = df_suc.reg_date.fillna(df_suc.reg_date.min())
        df_suc['datetime'] = pd.to_datetime(df_suc['reg_date'],
                                            format='%Y-%m-%d %H:%M:%S')
        df_suc['year'] = df_suc['reg_date'].map(lambda x: x.year)
        df_suc['month'] = df_suc['reg_date'].map(lambda x: x.month)
        df_suc['day'] = df_suc['reg_date'].map(lambda x: x.day)
        df_suc['hour'] = df_suc['reg_date'].map(lambda x: x.hour)
        df_suc['reg_date'] = df_suc['reg_date'].astype(str)
        df_suc['reg_month'] = df_suc['reg_date'].map(lambda x: x[0:7])
        df_suc['reg_day'] = df_suc['reg_date'].map(lambda x: int(x[8:10]))
        df_suc['dayofweek'] = df_suc['datetime'].apply(lambda x: x.dayofweek)

        single_ft = df_suc.groupby(['owner_id'
                                    ])['reg_date'].min().reset_index()
        single_ft = single_ft.rename(index=str,
                                     columns={'reg_date': 'first_time'})
        df_suc = df_suc.merge(single_ft, on=['owner_id'], how='left')
        df_suc = df_suc[~pd.isnull(df_suc.first_time)]
        df_suc['first_time'] = df_suc['first_time'].astype(str)
        single_ft = df_suc.groupby('owner_id')['reg_date'].max().reset_index()
        single_ft = single_ft.rename(index=str,
                                     columns={'reg_date': 'last_time'})
        df_suc = df_suc.merge(single_ft, on=['owner_id'], how='left')
        df_suc = df_suc[~pd.isnull(df_suc.last_time)]
        df_suc['last_time'] = df_suc['last_time'].astype(str)

        df_suc['first_month'] = df_suc['first_time'].map(lambda x: x[0:7])
        df_suc['first_date_obj'] = df_suc['first_time'].map(
            lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').date())
        df_suc['reg_date_obj'] = df_suc['reg_date'].map(
            lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S').date())
        df_suc['tmp'] = df_suc['reg_date_obj'] - df_suc['first_date_obj']
        df_suc['day_interval'] = df_suc['tmp'].map(lambda x: x.days)
        df_suc.drop(['first_date_obj', 'reg_date_obj', 'tmp'],
                    axis=1,
                    inplace=True)

        params = {}
        params['D_new_cont_month_fm'] = {}

        mon_count = used_month_count(df_suc)
        fm = df_suc.reg_date.min()[0:7]
        if fm <= df_suc.reg_date.max()[0:7]:
            D_new_cont_month = []
            D_new_cont_month_people = []
            total_people = 0
            # print(next_month(fm))
            for i in range(month_diff(fm) - 1):
                D_new_cont_month.append(i + 1)
                people, total_people = used_next_month_count(
                    df_suc, fm, i + 1, has_used_in_all_next_months)
                D_new_cont_month_people.append(people)
            D_new_cont_month_ratio = list(
                np.array(D_new_cont_month_people) / (total_people + 1))
            params['D_new_cont_month_fm'][fm] = dict(
                zip(D_new_cont_month, D_new_cont_month_ratio))
        # print(params)

        D_total_month = list(range(1, month_diff(fm)))
        tmp_df = df_suc.groupby(['owner_id']).reg_month.unique().reset_index()
        tmp_df['unique_month'] = tmp_df['reg_month'].apply(lambda x: len(x))
        D_total_month_people = tmp_df.groupby(
            ['unique_month']).owner_id.count().reset_index().owner_id.tolist()
        D_total_month = D_total_month[:len(D_total_month_people)]
        D_total_month_ratio = list(
            np.array(D_total_month_people) / sum(D_total_month_people))

        # user_used_days_qr: 用户所有使用过的天次,第一次使用为第0天
        # user_continue_day_count_qr: 用户最长连续使用天数
        # 可以由 user_used_days_qr -> (用户最后一次使用距第一次使用相隔多少天;用户使用频率,即使用的天数/相隔的天数)
        user_used_days_qr, user_continue_day_count_qr = longest_used_days(
            df_suc[['owner_id', 'day_interval']])

        user_continue_day_count_num_qr = dict()  # 最长连续使用天数的用户数量
        for k in user_continue_day_count_qr.keys():
            v = user_continue_day_count_qr[k]
            user_continue_day_count_num_qr[
                v] = user_continue_day_count_num_qr.get(v, 0) + 1
        continue_day_count_df_qr = pd.DataFrame()
        count_list = []
        num_list = []
        for k in user_continue_day_count_num_qr.keys():
            count_list.append(k)
            num_list.append(user_continue_day_count_num_qr[k])
        continue_day_count_df_qr = continue_day_count_df_qr.from_dict({
            'continue_day_count':
            count_list,
            'num':
            num_list
        })

        D_cont_day_people = []
        D_cont_day_people.append(
            day_ratio(continue_day_count_df_qr, 1, 1, 'continue_day_count'))
        D_cont_day_people.append(
            day_ratio(continue_day_count_df_qr, 2, 3, 'continue_day_count'))
        D_cont_day_people.append(
            day_ratio(continue_day_count_df_qr, 4, 7, 'continue_day_count'))
        D_cont_day_people.append(
            day_ratio(continue_day_count_df_qr, 8, 20, 'continue_day_count'))
        D_cont_day_people.append(
            day_ratio(continue_day_count_df_qr, 21, 999, 'continue_day_count'))
        D_cont_day_ratio = list(
            np.array(D_cont_day_people) / len(df_suc.owner_id.unique()))

        user_day_count_num_qr = dict()  # 总使用天数的用户数量 (最后一次-第一次)
        for k in user_used_days_qr.keys():
            v = user_used_days_qr[k][-1] + 1
            user_day_count_num_qr[v] = user_day_count_num_qr.get(v, 0) + 1
        day_count_df_qr = pd.DataFrame()
        count_list = []
        num_list = []
        for k in user_day_count_num_qr.keys():
            count_list.append(k)
            num_list.append(user_day_count_num_qr[k])
        day_count_df_qr = day_count_df_qr.from_dict({
            'day_count': count_list,
            'num': num_list
        })

        D_total_day_people = []
        D_total_day_people.append(day_ratio(day_count_df_qr, 1, 1,
                                            'day_count'))
        D_total_day_people.append(day_ratio(day_count_df_qr, 2, 5,
                                            'day_count'))
        D_total_day_people.append(
            day_ratio(day_count_df_qr, 6, 20, 'day_count'))
        D_total_day_people.append(
            day_ratio(day_count_df_qr, 21, 30, 'day_count'))
        D_total_day_people.append(
            day_ratio(day_count_df_qr, 31, 60, 'day_count'))
        D_total_day_people.append(
            day_ratio(day_count_df_qr, 61, 90, 'day_count'))
        D_total_day_people.append(
            day_ratio(day_count_df_qr, 91, 120, 'day_count'))
        D_total_day_people.append(
            day_ratio(day_count_df_qr, 120, 999, 'day_count'))
        D_total_day_ratio = list(
            np.array(D_total_day_people) / len(df_suc.owner_id.unique()))

        user_ratio_num_qr = {
            10: 0,
            20: 0,
            30: 0,
            40: 0,
            50: 0,
            60: 0,
            70: 0,
            80: 0,
            90: 0,
            100: 0,
            101: 0
        }
        for k in user_used_days_qr.keys():
            n = len(user_used_days_qr[k])
            last_day = user_used_days_qr[k][-1] + 1
            ratio = n / last_day
            if ratio <= 0.1:
                user_ratio_num_qr[10] = user_ratio_num_qr[10] + 1
            elif ratio <= 0.2:
                user_ratio_num_qr[20] = user_ratio_num_qr[20] + 1
            elif ratio <= 0.3:
                user_ratio_num_qr[30] = user_ratio_num_qr[30] + 1
            elif ratio <= 0.4:
                user_ratio_num_qr[40] = user_ratio_num_qr[40] + 1
            elif ratio <= 0.5:
                user_ratio_num_qr[50] = user_ratio_num_qr[50] + 1
            elif ratio <= 0.6:
                user_ratio_num_qr[60] = user_ratio_num_qr[60] + 1
            elif ratio <= 0.7:
                user_ratio_num_qr[70] = user_ratio_num_qr[70] + 1
            elif ratio <= 0.8:
                user_ratio_num_qr[80] = user_ratio_num_qr[80] + 1
            elif ratio <= 0.9:
                user_ratio_num_qr[90] = user_ratio_num_qr[90] + 1
            elif n > 1:
                user_ratio_num_qr[100] = user_ratio_num_qr[100] + 1
            else:
                user_ratio_num_qr[101] = user_ratio_num_qr[101] + 1
        tmp_list = list(user_ratio_num_qr.values())
        D_user_ratio_people = [tmp_list[-1]] + tmp_list[:-1]
        D_user_ratio_ratio = list(
            np.array(D_user_ratio_people) / len(df_suc.owner_id.unique()))

        user_used_month, user_used_mcount = user_months_and_count(df_suc)
        user_mw_dict = user_month_week_count(df_suc)
        user_ft_lt_dict = user_ft_lt(df_suc)
        user_ww_dict = user_work_week_num(df_suc)
        u_ids = []
        u_mds = []
        for user_id in df_suc['owner_id'].unique():
            u_ids.append(user_id)
            u_mds.append(
                user_model(df_suc, user_id, user_used_month, user_used_mcount,
                           user_mw_dict, user_ft_lt_dict, user_ww_dict))
        user_model_df = pd.DataFrame()
        user_model_df = user_model_df.from_dict({
            'owner_id': u_ids,
            "model": u_mds
        })
        D_model_people, D_model_ratio = user_model_count(user_model_df)

        self.__params['D_new_cont_month'] = D_new_cont_month
        self.__params['D_new_cont_month_people'] = D_new_cont_month_people
        self.__params['D_new_cont_month_ratio'] = D_new_cont_month_ratio
        self.__params['D_total_month'] = D_total_month
        self.__params['D_total_month_people'] = D_total_month_people
        self.__params['D_total_month_ratio'] = D_total_month_ratio
        self.__params['D_cont_day'] = [
            '连续使用1天', '连续使用2~3天', '连续使用4~7天', '连续使用8~20天', '连续使用21天以上'
        ]
        self.__params['D_cont_day_people'] = D_cont_day_people
        self.__params['D_cont_day_ratio'] = D_cont_day_ratio
        self.__params['D_total_day'] = [
            '总使用1天', '总使用2~5天', '总使用6~20天', '总使用21~30天', '总使用31~60天',
            '总使用61~90天', '总使用91~120天', '总使用121天以上'
        ]
        self.__params['D_total_day_people'] = D_total_day_people
        self.__params['D_total_day_ratio'] = D_total_day_ratio
        self.__params['D_user_pre'] = [
            '只使用1次', '0~0.1', '0.1~0.2', '0.2~0.3', '0.3~0.4', '0.4~0.5',
            '0.5~0.6', '0.6~0.7', '0.7~0.8', '0.8~0.9', '0.9~1'
        ]
        self.__params['D_user_pre_people'] = D_user_ratio_people
        self.__params['D_user_pre_ratio'] = D_user_ratio_ratio
        self.__params['D_model'] = [
            '稳定高频周末型', '稳定高频工作型', '稳定高频常用型', '稳定低频周末型', '稳定低频工作型', '稳定低频常用型',
            '一段时间之后不用型', '突发周末型', '突发工作型', '突发常用型', '本月新用户周末型', '本月新用户工作型',
            '本月新用户常用型'
        ]
        self.__params['D_model_people'] = D_model_people
        self.__params['D_model_ratio'] = D_model_ratio
        # print(self.__params)

        for k, v in self.__params.items():
            if 'people' not in k and 'ratio' not in k:
                params[k] = {}
                for i, vv in enumerate(v):
                    params[k][vv] = self.__params[k + '_ratio'][i]
            params['D_model_people'] = dict(
                zip(self.__params['D_model'], self.__params['D_model_people']))
        self.__data = params
        global_params['day_num'] = 30
        global_params['month_num'] = 3

    def maketext(self, global_params=None):
        # 允许传入全局变量, 但局部变量的优先级更高
        if global_params and type(global_params) == dict:
            for param in global_params:
                if param not in self.__params:
                    self.__params[param] = global_params[param]
        # 如果有缺失的变量, 填空字符串
        for param in self.__templete.get_params():
            if param not in self.__params:
                self.__params[param] = ''
        # 返回format结果
        return self.__templete.format_templet(self.__params)

    def makedata(self):
        from common.MyEncoder import MyEncoder
        return json.dumps(dict(self.__data), ensure_ascii=False, cls=MyEncoder)
Exemple #14
0
 def __init__(self):
     from common.TempletLoader import TempletLoader
     self.__templete = TempletLoader('templets/module1.txt')
     self.__params = {}
 def __init__(self):
     from common.TempletLoader import TempletLoader
     self.__templete = TempletLoader('templets/module0.txt')
     self.__params = {}
     self.__data = {}
     self.name = "module_newuseranalize"
Exemple #16
0
 def __init__(self):
     from common.TempletLoader import TempletLoader
     self.__templete = TempletLoader('templets/module0.txt')
     self.__params = {}
     for param in self.__templete.get_params():
         self.__params[param] = ''
Exemple #17
0
class Module(object):
    def __init__(self):
        from common.TempletLoader import TempletLoader
        self.__templete = TempletLoader('templets/module_workholi_cmp.txt')
        self.__city_dict = {
            '广州': ['机场南', '广州东站'],
        }
        self.name = "module_workholi_cmp"
        self.__params = {}
        self.__data = {}

    def run(self, df, global_params=None):
        if global_params is None:
            global_params = {}
        df_suc = df[df.order_status == 5][[
            'ticket_num', 'entry_date', 'entry_station', 'exit_station'
        ]]
        df_suc['weekday'] = df_suc.entry_date.map(lambda x: x.weekday())
        df_suc['hour'] = df_suc.entry_date.map(lambda x: x.hour)
        days = df_suc.entry_date.max() - df_suc.entry_date.min()
        days = days.round('d').days
        workday_seq = (df_suc[df_suc.weekday < 5].groupby(
            'hour').ticket_num.sum().sort_index() /
                       days).map(lambda x: round(x, 3))
        holiday_seq = (df_suc[df_suc.weekday >= 5].groupby(
            'hour').ticket_num.sum().sort_index() /
                       days).map(lambda x: round(x, 3))
        self.__params['workday_seq_maxhour'] = workday_seq.argmax()
        self.__params['workday_seq_maxfluency'] = int(workday_seq.max())
        self.__params['holiday_seq_maxhour'] = holiday_seq.argmax()
        self.__params['holiday_seq_maxfluency'] = int(holiday_seq.max())
        workday_seqs = {}
        holiday_seqs = {}
        for station in self.__city_dict[global_params.get('city', '广州')]:
            workday_seqs[station] = (
                df_suc[(df_suc.entry_station == station)
                       & (df_suc.weekday < 5)].groupby(
                           'hour').ticket_num.sum().sort_index() /
                days).map(lambda x: round(x, 3))
            holiday_seqs[station] = (
                df_suc[(df_suc.entry_station == station)
                       & (df_suc.weekday >= 5)].groupby(
                           'hour').ticket_num.sum().sort_index() /
                days).map(lambda x: round(x, 3))
        # 填数据
        self.__data['workday_full_seq'] = [
            workday_seq.index.tolist(),
            workday_seq.tolist()
        ]
        self.__data['holiday_full_seq'] = [
            holiday_seq.index.tolist(),
            holiday_seq.tolist()
        ]
        self.__data['stations'] = self.__city_dict[global_params.get(
            'city', '广州')]
        self.__data['station_workday_seqs'] = {
            station: [
                workday_seqs[station].index.tolist(),
                workday_seqs[station].tolist()
            ]
            for station in self.__data['stations']
        }
        self.__data['station_holiday_seqs'] = {
            station: [
                holiday_seqs[station].index.tolist(),
                holiday_seqs[station].tolist()
            ]
            for station in self.__data['stations']
        }
        #
        global_params['workday_seq_maxhour'] = self.__params[
            'workday_seq_maxhour']
        global_params['workday_seq_maxfluency'] = self.__params[
            'workday_seq_maxfluency']
        global_params['holiday_seq_maxhour'] = self.__params[
            'holiday_seq_maxhour']
        global_params['holiday_seq_maxfluency'] = self.__params[
            'holiday_seq_maxfluency']

    def maketext(self, global_params=None):
        # 允许传入全局变量, 但局部变量的优先级更高
        if global_params and type(global_params) == dict:
            for param in global_params:
                if param not in self.__params:
                    self.__params[param] = global_params[param]
        # 如果有缺失的变量, 填空字符串
        for param in self.__templete.get_params():
            if param not in self.__params:
                self.__params[param] = ''
        # 返回format结果
        return self.__templete.format_templet(self.__params)

    def makedata(self):
        import json
        from common.MyEncoder import MyEncoder
        return json.dumps(dict(self.__data), ensure_ascii=False, cls=MyEncoder)
 def __init__(self):
     from common.TempletLoader import TempletLoader
     self.__templete = TempletLoader('templets/module_peopleflow.txt')
     self.__params = {}
     self.name = "module_peopleflow"
class Module(object):
    def __init__(self):
        from common.TempletLoader import TempletLoader
        self.__templete = TempletLoader('templets/module_jamanalize.txt')
        self.__params = {}
        self.__data = {}
        self.name = "module_jamanalize"
        self.__time_period = [(7, 9), (17, 19)]  # 长度必须为2,代表早晚上班时段
    def run(self, df, global_params=None):
        import pickle as pk
        import pandas as pd
        import os
        if global_params is None:
            global_params = {}
        # 用最短路径作为预测
        city = global_params.get('city', '广州')
        if not os.path.exists('routes/%s_route.pk' % city):
            return
        routes = pk.load(open('routes/%s_route.pk' % city, 'rb'))
        # 生成(工作日、周末) x (上班时段、下班时段) 路径人数并排序
        df_suc = df[df.order_status == 5]
        df_suc['weekday'] = df_suc.entry_date.map(lambda x: x.weekday())
        df_suc['hour'] = df_suc.entry_date.map(lambda x: x.hour)
        morning = self.__time_period[0]
        evening = self.__time_period[1]
        df_workday_morning = df_suc[
            (df_suc.weekday < 5) & (df_suc.hour >= morning[0]) & (df_suc.hour <= morning[1])].groupby(
            ['entry_station', 'exit_station']).ticket_num.sum().reset_index().values
        df_workday_evening = df_suc[
            (df_suc.weekday < 5) & (df_suc.hour >= evening[0]) & (df_suc.hour <= evening[1])].groupby(
            ['entry_station', 'exit_station']).ticket_num.sum().reset_index().values
        df_holiday_morning = df_suc[
            (df_suc.weekday >= 5) & (df_suc.hour >= morning[0]) & (df_suc.hour <= morning[1])].groupby(
            ['entry_station', 'exit_station']).ticket_num.sum().reset_index().values
        df_holiday_evening = df_suc[
            (df_suc.weekday >= 5) & (df_suc.hour >= evening[0]) & (df_suc.hour <= evening[1])].groupby(
            ['entry_station', 'exit_station']).ticket_num.sum().reset_index().values
        error_counts = 0
        try:
            for d in ['workday', 'holiday']:
                for t in ['morning', 'evening']:
                    locals()['%s_%s_res' % (d, t)] = {}
                    for row in locals()['df_%s_%s' % (d, t)]:
                        if row[0] == row[1]:
                            continue
                        for path in routes[(row[0], row[1])]:
                            locals()['%s_%s_res' % (d, t)][path] = locals()['%s_%s_res' % (d, t)].get(path, 0) + row[2]
                    del locals()['df_%s_%s' % (d, t)]
                    tmp = locals()['%s_%s_res' % (d, t)]
                    locals()['%s_%s_res' % (d, t)] = pd.DataFrame(
                        [{'start': each[0], 'end': each[1], 'line':each[2],'fluency': tmp[each]} for each in tmp]).sort_values('fluency',ascending=False)
                    locals()['%s_%s_res'% (d, t)]['level'] = pd.cut(locals()['%s_%s_res'%(d, t)]['fluency'], 5, labels=False)
                    self.__params['%s_%s_jam_routes'% (d, t)] = '\n'.join([','.join(each) for each in locals()['%s_%s_res'% (d, t)][locals()['%s_%s_res'% (d, t)].level==4][['start','end','line','fluency']].astype(str).values])
                    del tmp
                    for jam_lv in range(0,5):
                        if jam_lv == 0:
                            self.__data['%s_%s_jam_routes'%(d, t)] = {}
                        self.__data['%s_%s_jam_routes' % (d, t)][jam_lv] = locals()['%s_%s_res'% (d, t)][locals()['%s_%s_res'% (d, t)].level==jam_lv][['start', 'end', 'level']].as_matrix().tolist()
            global_params['M7_AM_busy_routes'] = '、'.join('-'.join([str(x) for x in each[:2]]) for each in self.__data['workday_morning_jam_routes'][4][:3])
        except:
            error_counts += 1
        print('----Jam Analize Finished. %d error meet.'%error_counts)
        global_params['M7_AM_lv4_count'] = len(self.__data['workday_morning_jam_routes'][4])
        global_params['M7_AM_lv4_rate'] = '%.2f'%(global_params['M7_AM_lv4_count'] / len(routes) * 100)
        global_params['M7_PM_busy_routes'] = '、'.join('-'.join([str(x) for x in each[:2]]) for each in self.__data['workday_evening_jam_routes'][4][:3])
        global_params['M7_PM_lv4_count'] = len(self.__data['workday_evening_jam_routes'][4])
        global_params['M7_PM_lv4_rate'] = '%.2f'%(global_params['M7_PM_lv4_count'] / len(routes) * 100)


    def maketext(self, global_params=None):
        # 允许传入全局变量, 但局部变量的优先级更高
        if global_params and type(global_params) == dict:
            for param in global_params:
                if param not in self.__params:
                    self.__params[param] = global_params[param]
        # 如果有缺失的变量, 填空字符串
        for param in self.__templete.get_params():
            if param not in self.__params:
                self.__params[param] = ''
        # 返回format结果
        return self.__templete.format_templet(self.__params)

    def makedata(self):
        import json
        from common.MyEncoder import MyEncoder
        return json.dumps(dict(self.__data), ensure_ascii=False, cls=MyEncoder)
        # return ''
Exemple #20
0
 def __init__(self):
     from common.TempletLoader import TempletLoader
     self.__templete = TempletLoader('templets/module_ticketrate.txt')
     self.__params = {}
     self.name = "module_ticketrate"
class Module(object):
    def __init__(self):
        from common.TempletLoader import TempletLoader
        self.__templete = TempletLoader('templets/module_dataanalize.txt')
        self.name = "module_dataanalize"
        self.__params = {}
        self.__data = {}

    def run(self, df, global_params=None):
        if global_params is None:
            global_params = {}
        # 订单状态分布
        order_status = df.order_status.value_counts().to_dict()
        order_status_tk = df.groupby('order_status').ticket_num.sum().to_dict()
        # print(order_status)
        # 订单总数
        order_nums = df.shape[0]
        # 出票订单数、购票但未取票订单数、失效订单数
        order_get_tk = order_status.get(5, 0)
        order_notpay = order_status.get(1, 0)
        order_pay_notuse = order_status.get(2, 0)
        order_canceled = order_status.get(3, 0) + order_status.get(
            6, 0) + order_status.get(7, 0)
        # 出票数、订单总票数
        tk_get_num = order_status_tk.get(5, 0)
        tk_get_sum = sum(order_status_tk.values())
        # 站点名称数、站点数量、编号数量

        # 有过购票行为的站点数,站点编码数
        st_num = len(
            set(df.entry_station.unique()) | set(df.exit_station.unique()))
        st_code_num = len(
            set(df.entry_station_code.unique())
            | set(df.exit_station_code.unique()))
        # 填充至变量中!
        # order_num, order_use_num, order_pay_not_use_num, order_fail_num, ticket_num, ticket_use_num
        self.__params['order_num'] = order_nums
        self.__params['order_use_num'] = order_get_tk
        self.__params['order_notpay'] = order_notpay
        self.__params['order_pay_not_use_num'] = order_pay_notuse
        self.__params['order_fail_num'] = order_canceled
        self.__params['ticket_num'] = tk_get_sum
        self.__params['ticket_use_num'] = tk_get_num

        self.__data['order_num'] = order_nums
        self.__data['order_use_num'] = order_get_tk
        self.__data['order_notpay'] = order_notpay
        self.__data['order_pay_not_use_num'] = order_pay_notuse
        self.__data['order_fail_num'] = order_canceled
        self.__data['ticket_num'] = tk_get_sum
        self.__data['ticket_use_num'] = tk_get_num

    def maketext(self, global_params=None):
        # 允许传入全局变量, 但局部变量的优先级更高
        if global_params and type(global_params) == dict:
            for param in global_params:
                if param not in self.__params:
                    self.__params[param] = global_params[param]
        # 如果有缺失的变量, 填空字符串
        for param in self.__templete.get_params():
            if param not in self.__params:
                self.__params[param] = ''
        # 返回format结果
        return self.__templete.format_templet(self.__params)

    def makedata(self):
        # for each in self.__data:
        #     print(each, self.__data[each], self.__data[each].dtype)
        import json
        from common.MyEncoder import MyEncoder
        return json.dumps(dict(self.__data), ensure_ascii=False, cls=MyEncoder)