예제 #1
0
    def merge_json(self):
        main_data = {}

        t = {'trip_analysis': load_json('trip_analysis.json')}
        a = {'area_analysis': load_json('area_analysis.json')}
        b = {'basic_info': load_json('basic_info.json')}
        m = {'monthly_consumption': load_json('monthly_consumption.json')}
        u = {'user_portrait': load_json('user_portrait.json')}
        c = {'call_log': load_json('call_log.json')}
        h = {'head_info': load_json('head_info.json')}

        main_data.update(t)
        main_data.update(a)
        main_data.update(b)
        main_data.update(m)
        main_data.update(u)
        main_data.update(c)
        main_data.update(h)

        for name in main_data:
            os.remove('json/' + name + '.json')

        dump_json('main.json', main_data)

        print('数据分析完成')
예제 #2
0
    def monthly_consumption(self):
        monthly_consumption = []

        data = load_json('basic_info.json')

        money_spent = data['monthly_consumption']
        money_spent.insert(0, -1)
        for month in self.month_list:
            monthly_consumption_dict = {
                "call_cnt": 0,
                'call_consumption': 0.00,
                "call_seconds": 0,
                "called_cnt": 0,
                "called_seconds": 0,
                "month": month.strftime('%Y-%m'),
                "msg_cnt": 0,
                "receive_cnt": 0,
                "send_cnt": 0,
                "talk_cnt": 0,
                "talk_seconds": 0,
                "unknown_cnt": 0
            }
            monthly_consumption.append(monthly_consumption_dict)

        i = 0
        for dict_ in monthly_consumption:
            dict_['call_consumption'] = money_spent[i]
            i += 1
            for call in self.call_list:
                if call[2][:7] == dict_['month']:
                    add_call_detail(dict_, call)
            for msg in self.msg_list:
                if msg[0][:7] == dict_['month']:
                    add_msg_detail(dict_, msg)

        data.pop('monthly_consumption')

        dump_json('basic_info.json', data)
        dump_json('monthly_consumption.json', monthly_consumption)
예제 #3
0
    def total_time_rank(self):
        ranked_list = []

        def search(phone):
            for item in ranked_list:
                if item['phone'] == phone:
                    return False
            return True

        for call in self.call_list:
            if search(call[5]):
                ranked_list.append({
                    'phone': call[5],
                    'talkMinutes': 0,
                    'callCnt': 0,
                    'calledCnt': 0,
                    'phoneLocation': call[7],
                    'identical': False,
                    'phoneInfo': '未知',
                    'phoneLabel': '未知'
                })

            for number in ranked_list:
                if number['phone'] == call[5]:
                    number['talkMinutes'] += call[3]
                if call[4] == '被叫':
                    number['calledCnt'] += 1
                elif call[4] == '主叫':
                    number['callCnt'] += 1

        # sort by minutes
        ranked_list.sort(key=lambda k: k['talkMinutes'], reverse=True)

        # total minutes
        for number in ranked_list:
            number['talkMinutes'] = round(number['talkMinutes'] / 60, 2)

        dump_json('total_time_rank.json', ranked_list)
예제 #4
0
    def special_cate(self):
        special_cate = []

        numtype = ['110', '120']
        for num in numtype:
            special_cate_dict = {
                "call_cnt": 0,
                "call_seconds": 0,
                "called_cnt": 0,
                "called_seconds": 0,
                "cate": num,
                "month_detail": [],
                "msg_cnt": 0,
                "phone_detail": [],
                "receive_cnt": 0,
                "send_cnt": 0,
                "talk_cnt": 0,
                "talk_seconds": 0,
                "unknown_cnt": 0
            }
            special_cate.append(special_cate_dict)

        def add_special_cate(typestr, index):
            for call in self.call_list:
                if call[5] == typestr:
                    special_cate[index]['talk_cnt'] += 1
                    if call[4] == '被叫':
                        special_cate[index]['calledCnt'] += 1
                        special_cate[index]['called_seconds'] += call[3]
                    elif call[4] == '主叫':
                        special_cate[index]['callCnt'] += 1
                        special_cate[index]['call_seconds'] += call[3]

        add_special_cate('110', 0)
        add_special_cate('120', 1)

        dump_json('special_cate.json', special_cate)
예제 #5
0
    def user_info(self):
        userinfo = {
            'operator': "CHINAUNICOM",
            'operator_zh': "中国联通",
            'id_card_check': 0,
            'name_check': 0,
            'if_call_emergency1': 0,
            'if_call_emergency2': 0,
            'monthly_consumption': [],
            'phone_location': '',
            'phone': self.phone,
            'ave_monthly_consumption': ''
        }

        self.driver.get(
            'http://iservice.10010.com/e4/query/others/service_belong.html?menuId=000400010003'
        )
        number_input = self.driver.find_element_by_id('inputNnumber')
        number_input.send_keys(self.phone)
        number_input.send_keys(Keys.RETURN)
        time.sleep(2)
        userinfo['phone_location'] = self.driver.find_element_by_xpath(
            '//*[@id="addr"]').text

        self.driver.get(
            'http://iservice.10010.com/e4/query/basic/personal_xx.html')
        time.sleep(15)

        userinfo['id_card'] = self.driver.find_element_by_xpath(
            '//*[@id="userInfocontext"]/div[2]/div[1]/dl[3]/dd').text[7:18]

        userinfo['real_name'] = self.driver.find_element_by_xpath(
            '//*[@id="userInfocontext"]/div[2]/div[1]/dl[1]/dd').text

        userinfo['reg_date'] = self.driver.find_element_by_xpath(
            '//*[@id="numberContext"]/div[2]/div[2]/dl[1]/dd').text
        userinfo['reg_date'].replace('年', '-')
        userinfo['reg_date'].replace('月', '-')
        userinfo['reg_date'].replace('日', '-')

        self.driver.get(
            'http://iservice.10010.com/e4/query/calls/account_balance.html?menuId=000100010013'
        )
        time.sleep(20)
        userinfo['current_balance'] = float(
            self.driver.find_element_by_xpath(
                '//*[@id="userInfoContent"]/dl[2]/dd/i').text[:-2])

        self.driver.get(
            'http://iservice.10010.com/e4/query/basic/history_list.html?menuId=000100020001'
        )
        time.sleep(10)
        total_spending = 0

        i = 1
        while i <= 6:
            self.driver.find_element_by_xpath(
                '//*[@id="score_list_ul"]/li[{}]'.format(i)).click()
            time.sleep(10)

            try:
                text = self.driver.find_element_by_xpath(
                    '//*[@id="historylistContext"]/table/tbody/tr/td[1]/table/tbody'
                ).text
            except:
                text = self.driver.find_element_by_xpath(
                    '//*[@id="historylistContext"]/table/tbody').text

            text = float(re.findall(r"[-+]?\d*\.\d+|\d+", text.rstrip())[-1])
            userinfo['monthly_consumption'].append(text)
            total_spending += text

            i += 1
            print(total_spending)
        userinfo['ave_monthly_consumption'] = round(total_spending / 6, 2)

        dump_json('basic_info.json', userinfo)
예제 #6
0
    def basic_info(self):
        user_detail = 'http://iservice.10010.com/e3/static/query/searchPerInfoDetail/?_={}'.format(
            unix_time)

        account_balance_page = 'http://iservice.10010.com/e4/query/calls/account_balance.html?menuId=000100010013'
        account_balance = 'http://iservice.10010.com/e3/static/query/accountBalance/search?_={}' \
                          '&accessURL={}&type=onlyAccount'.format(unix_time, account_balance_page)

        basic_info = {
            'operator': "CHINAUNICOM",
            'operator_zh': "中国联通",
            'id_card_check': 0,
            'name_check': 0,
            'if_call_emergency1': 0,
            'if_call_emergency2': 0,
            'monthly_consumption': [],
            'phone_location': '',
            'phone': self.phone,
            'ave_monthly_consumption': ''
        }

        self.s.post(
            'http://iservice.10010.com/e3/static/common/info?_={}'.format(
                unix_time))

        r = self.s.post(check_login)
        d = json.loads(r.text)
        basic_info['id_card'] = d['userInfo']['certnum']
        basic_info['real_name'] = d['userInfo']['custName']
        print('获得姓名,身份证')

        r = self.s.post(user_detail)
        d = json.loads(r.text)
        reg_date = d['userInfo']['openDate']
        basic_info['reg_date'] = dateutil.parser.parse(reg_date).strftime(
            '%Y-%m-%d')
        print('获得注册日期')

        r = self.s.post(callerLocationQuery.format(unix_time, self.phone))
        d = json.loads(r.text)
        if d['dto']['provinceName'] == d['dto']['cityName']:
            basic_info['phone_location'] = d['dto']['cityName']
        else:
            basic_info['phone_location'] = d['dto']['provinceName'] + d['dto'][
                'cityName']
        print('获得归属地')

        r = self.s.post(account_balance)
        d = json.loads(r.text)
        basic_info['current_balance'] = d['acctbalance']
        print('获得余额')

        r = self.s.post(queryHistoryAccountMonth)
        d = json.loads(r.text)
        total = 0.00
        print('正在获取月消费')
        for dct in d['accountPeriod']:
            r = self.s.post(
                queryHistoryBill.format(unix_time, account_history_url,
                                        dct['queryDate']))
            try:
                spending = json.loads(r.text)['result']['writeofffee']
            except:
                spending = json.loads(r.text)['payTotal']
            total += float(spending)
            basic_info['monthly_consumption'].append(float(spending))
        basic_info['ave_monthly_consumption'] = round(
            total / len(basic_info['monthly_consumption']), 2)
        print('获得月消费')

        dump_json('basic_info.json', basic_info)
예제 #7
0
    def head_info(self):
        head_info = {'user_type': 1}
        curr_time = datetime.datetime.now()
        head_info['report_time'] = str(curr_time)[:19]

        dump_json('head_info.json', head_info)
예제 #8
0
    def trip_analysis(self):
        gsd = load_json('basic_info.json')['phone_location']
        trip_analysis = []

        trip_location = set()
        calls_by_trip = {}
        for call in self.call_list:
            if gsd not in call[6]:
                trip_location.add(call[6])
                if call[6] not in calls_by_trip:
                    calls_by_trip[call[6]] = []
                else:
                    calls_by_trip[call[6]].append(call)

        msg_by_trip = {}
        for msg in self.msg_list:
            if gsd not in msg[-1] and msg[-1] != '':
                if msg[-1] not in msg_by_trip:
                    msg_by_trip[msg[-1]] = []
                else:
                    msg_by_trip[msg[-1]].append(msg)

        for location in trip_location:
            trip_analysis_dict = {
                "call_cnt": 0,
                "call_seconds": 0,
                "called_cnt": 0,
                "called_seconds": 0,
                "detail": [],
                "msg_cnt": 0,
                "receive_cnt": 0,
                "send_cnt": 0,
                "talk_cnt": 0,
                "talk_seconds": 0,
                "unknown_cnt": 0,
                'date_distribution': [],
                'location': location
            }
            trip_analysis.append(trip_analysis_dict)

        for trip in trip_analysis:
            for location in calls_by_trip:
                if trip['location'] == location:
                    date_distribution = set()
                    for call in calls_by_trip[location]:
                        date_distribution.add(call[2][:7])
                        add_call_detail(trip, call)
                    trip['date_distribution'] = list(date_distribution)
                    trip['date_distribution'].sort(reverse=True)
                    try:
                        for msg in msg_by_trip[location]:
                            add_msg_detail(trip, msg)
                    except KeyError:
                        pass

                    for month in trip['date_distribution']:
                        detail = {
                            "call_cnt": 0,
                            "call_seconds": 0,
                            "called_cnt": 0,
                            "called_seconds": 0,
                            "month": month,
                            "msg_cnt": 0,
                            "receive_cnt": 0,
                            "send_cnt": 0,
                            "talk_cnt": 0,
                            "talk_seconds": 0,
                            "unknown_cnt": 0
                        }
                        trip['detail'].append(detail)

                for detail in trip['detail']:
                    for call in calls_by_trip[location]:
                        if call[2][:7] == month:
                            add_call_detail(detail, call)
                    try:
                        for msg in msg_by_trip[location]:
                            add_msg_detail(detail, msg)
                    except KeyError:
                        pass

        dump_json('trip_analysis.json', trip_analysis)
예제 #9
0
    def area_analysis(self):
        prov_set = set()
        area_analysis = []
        for call in self.call_list:
            for prov in province_dict:
                if prov in call[7]:
                    prov_set.add(prov)

        for prov in prov_set:
            area_analysis_dict = {
                "call_cnt": 0,
                "call_seconds": 0,
                "called_cnt": 0,
                "called_seconds": 0,
                "detail": [],
                "msg_cnt": 0,
                "receive_cnt": 0,
                "send_cnt": 0,
                "talk_cnt": 0,
                "talk_seconds": 0,
                "unknown_cnt": 0,
                'area': prov
            }
            area_analysis.append(area_analysis_dict)

        for area in area_analysis:
            month_set = set()
            for call in self.call_list:
                if area['area'] in call[7]:
                    month_set.add(call[2][:7])
                    add_call_detail(area, call)
            for msg in self.msg_list:
                if area['area'] in msg[-1]:
                    add_msg_detail(area, msg)

            for month in month_set:
                detail = {
                    "call_cnt": 0,
                    "call_seconds": 0,
                    "called_cnt": 0,
                    "called_seconds": 0,
                    "month": month,
                    "msg_cnt": 0,
                    "receive_cnt": 0,
                    "send_cnt": 0,
                    "talk_cnt": 0,
                    "talk_seconds": 0,
                    "unknown_cnt": 0
                }
                area['detail'].append(detail)

            area['detail'].sort(key=operator.itemgetter('month'), reverse=True)

            for detail in area['detail']:
                for call in self.call_list:
                    if detail['month'] == call[2][:7] and area['area'] in call[
                            7]:
                        add_call_detail(detail, call)

                for msg in self.msg_list:
                    if msg[0][:7] == detail['month'] and area['area'] in msg[
                            -1]:
                        add_msg_detail(area, msg)

        area_analysis.sort(key=operator.itemgetter('talk_seconds'),
                           reverse=True)
        dump_json('area_analysis.json', area_analysis)
예제 #10
0
    def call_log(self):
        call_log = []
        call_detail_by_number = {}
        msg_detail_by_number = {}

        for number in self.phone_list:
            call_log_dict = {
                "call_cnt": 0,
                "call_seconds": 0,
                "called_cnt": 0,
                "called_seconds": 0,
                "contact_1m": 0,
                "contact_1w": 0,
                "contact_3m": 0,
                "contact_afternoon": 0,
                "contact_early_morning": 0,
                "contact_eveing": 0,
                "contact_morning": 0,
                "contact_night": 0,
                "contact_noon": 0,
                "contact_weekday": 0,
                "contact_weekend": 0,
                "detail": [],
                "first_contact_date": "",
                "last_contact_date": "",
                "msg_cnt": 0,
                "phone": "",
                "phone_info": "未知",
                "phone_label": "未知",
                "phone_location": "",
                "receive_cnt": 0,
                "send_cnt": 0,
                "talk_cnt": 0,
                "talk_seconds": 0,
                "unknown_cnt": 0
            }
            call_log_dict['phone'] = number
            call_detail_by_number[number] = []

            for call in self.call_list:
                if number == call[5]:
                    add_call_detail(call_log_dict, call)
                    call_detail_by_number[number].append(call)
                    call_log_dict['phone_location'] = call[7]

                    hour = int(call[2][11:13])
                    if time_in_range(23, 5, hour):
                        call_log_dict['contact_early_morning'] += 1
                    elif time_in_range(5, 9, hour):
                        call_log_dict['contact_morning'] += 1
                    elif time_in_range(9, 12, hour):
                        call_log_dict['contact_noon'] += 1
                    elif time_in_range(12, 18, hour):
                        call_log_dict['contact_afternoon'] += 1
                    elif time_in_range(18, 21, hour):
                        call_log_dict['contact_eveing'] += 1
                    elif time_in_range(21, 23, hour):
                        call_log_dict['contact_night'] += 1

            call_log.append(call_log_dict)

        for number in self.msg_phone_list:
            msg_detail_by_number[number] = []

        for msg in self.msg_list:
            for log in call_log:
                if msg[3] == log['phone']:
                    msg_detail_by_number[msg[3]] = msg
                    add_msg_detail(log, msg)

        for log in call_log:
            call_date = []
            for number in call_detail_by_number:
                if number == log['phone']:
                    log['first_contact_date'] = call_detail_by_number[number][
                        0][2]
                    log['last_contact_date'] = call_detail_by_number[number][
                        0][2]
                    for call in call_detail_by_number[number]:
                        day = dateutil.parser.parse(call[2][:-9])
                        weekday = day.weekday()
                        call_date.append(day.toordinal())
                        if 1 <= weekday <= 5:
                            log['contact_weekday'] += 1
                        elif 6 <= weekday <= 7:
                            log['contact_weekend'] += 1

            latest = call_date[-1]
            for day in call_date:
                if day >= latest - 7:
                    log['contact_1w'] += 1
                if day >= latest - 30:
                    log['contact_1m'] += 1
                if day >= latest - 90:
                    log['contact_3m'] += 1

        for number in call_detail_by_number:
            month_set = set()
            for call in call_detail_by_number[number]:
                month_set.add(call[2][:7])

            for log in call_log:
                if log['phone'] == number:
                    # add month detail
                    for month in month_set:
                        detail = {
                            "call_cnt": 0,
                            "call_seconds": 0,
                            "called_cnt": 0,
                            "called_seconds": 0,
                            "month": '',
                            "msg_cnt": 0,
                            "receive_cnt": 0,
                            "send_cnt": 0,
                            "talk_cnt": 0,
                            "talk_seconds": 0,
                            "unknown_cnt": 0
                        }
                        detail['month'] = month
                        log['detail'].append(detail)
                log['detail'].sort(key=operator.itemgetter('month'),
                                   reverse=True)

        for log in call_log:
            for detail in log['detail']:
                for number in call_detail_by_number:
                    for call in call_detail_by_number[number]:
                        if call[2][:7] == detail['month']:
                            add_call_detail(detail, call)
                    try:
                        for msg in msg_detail_by_number[number]:
                            if msg[0][:7] == detail['month']:
                                add_msg_detail(log, msg)
                    except KeyError:
                        pass

        call_log.sort(key=operator.itemgetter('talk_seconds'), reverse=True)
        dump_json('call_log.json', call_log)
예제 #11
0
    def user_portrait(self):
        user_portrait = {
            'active_days': {
                'end_day': '',
                'start_day': '',
                'stop_3_days': 0,
                'stop_3_days_detail': [],
                'stop_days': 0,
                'stop_days_detail': [],
                'total_days': 0
            },
            'both_call_cnt': 0,
            'content_distribution': {},
            'night_activity_ratio': 0.00,
            'night_msg_ratio': 0
        }
        date_list = []
        total_date_list = []
        call_hour_list = []
        msg_hour_list = []
        location_list = []
        call_status = {}

        for call in self.call_list:
            date_list.append(call[2][:-9])
            call_hour_list.append(int(call[2][11:13]))
            location_list.append(call[7])

        for msg in self.msg_list:
            msg_hour_list.append(int(msg[0][11:13]))

        prov_counter = collections.Counter(location_list)
        mc = prov_counter.most_common(1)[0]
        user_portrait['content_distribution'] = {
            'location': mc[0],
            'ratio': round(mc[1] * 100 / len(self.call_list), 2)
        }

        date_list.sort()
        user_portrait['active_days']['end_day'] = date_list[-1]
        user_portrait['active_days']['start_day'] = date_list[0]

        for month in self.month_list:
            num_days = calendar.monthrange(month.year, month.month)[1]
            total_date_list.extend([
                datetime.date(month.year, month.month,
                              day).strftime('%Y-%m-%d')
                for day in range(1, num_days + 1)
            ])

        stop_days_detail = list(set(total_date_list) - set(date_list))
        stop_days_detail.sort()
        user_portrait['active_days']['stop_days_detail'] = stop_days_detail
        user_portrait['active_days']['stop_days'] = len(stop_days_detail)

        stop_date_ints = list(gen_date_int(stop_days_detail))
        # live_date_ints = list(gen_date_int(date_list))
        stop_3_days_detail = set()
        for date_int in stop_date_ints:
            if date_int + 1 and date_int + 2 in stop_date_ints:
                user_portrait['active_days']['stop_3_days'] += 1
                stop_3_days_detail.add(date_int)

        stop_3_days_detail = list(stop_3_days_detail)
        # stop_3_days_detail[-1] += 2
        for date_int in stop_3_days_detail:
            day = datetime.date.fromordinal(date_int)
            nextday = datetime.date.fromordinal(date_int + 2)
            user_portrait['active_days']['stop_3_days_detail'].append(
                '{} - {}'.format(day, nextday))

        def hour_ratio(lst):
            hours = 0
            for hour in lst:
                if hour >= 23 or hour <= 5:
                    hours += 1
            return hours

        call_night_hours = hour_ratio(call_hour_list)
        msg_night_hours = hour_ratio(msg_hour_list)

        user_portrait['night_activity_ratio'] = round(
            call_night_hours * 100 / len(call_hour_list), 2)
        user_portrait['night_msg_ratio'] = round(
            msg_night_hours * 100 / len(msg_hour_list), 2)

        user_portrait['active_days']['total_days'] = dateutil.parser.parse(
            date_list[-1]).toordinal() - dateutil.parser.parse(
                date_list[0]).toordinal()

        for number in self.phone_list:
            call_status[number] = {'call': False, 'called': False}

        for call in call_status:
            for item in self.call_list:
                if call == item[5]:
                    if item[4] == '被叫':
                        call_status[call]['called'] = True
                    elif item[4] == '主叫':
                        call_status[call]['call'] = True

        for number in call_status:
            if call_status[number]['called'] is True and call_status[number][
                    'call'] is True:
                user_portrait['both_call_cnt'] += 1

        dump_json('user_portrait.json', user_portrait)