Ejemplo n.º 1
0
def calculate_features(info_path, date, user):
    """
    calculate features about sleep
    :param whole_path:
    :param date:
    :return:
    """
    day = date
    sleep_df = bs.read_xlsx(info_path, '睡眠信息')
    res = pd.DataFrame(columns=features)
    temp = [0, 0, 0, 0, 0, 0, 0, 0]
    for index in sleep_df.index:
        raw = sleep_df.loc[index].values
        if raw[1] == user and raw[7] == day:
            temp[0] = get_time(raw[3])
            temp[2] = get_time(raw[2])
            temp[1] = temp[2] - temp[0]
            temp[3] = temp[0] / temp[2]
            temp[4] = 1 - temp[3]
            temp[7] = (24 * 60 - temp[2]) / (24 * 60)
            temp[5] = int(raw[4][:2])
            temp[6] = int(raw[5][:2])

    res.loc[len(res)] = temp

    res.to_csv(bs.get_save_whole_name('sleep.csv'))
Ejemplo n.º 2
0
def create_csv():
    """
    all in one
    :return:
    """
    temp = []
    for file in files:
        df = pd.read_csv(bs.get_save_whole_name(file + '.csv'))
        if 'Unnamed: 0' in df.columns.values:
            del df['Unnamed: 0']
        _dict = {}
        for x in df.columns.values:
            _dict[x] = file + '_' + x
        df.rename(columns=_dict, inplace=True)
        temp.append(df)
    temp = pd.concat(temp, axis=1)
    temp.to_csv(bs.get_save_whole_name('all_inone.csv'))
def calculaue_features(whole_path, info_path, date, users):
    '''
    main function
    :return:
    '''
    home_work_place = get_home_work_place(info_path, users)
    df = pd.read_csv(whole_path, header=None)
    location_dict = calculate_location_features(home_work_place, df.copy(),
                                                date)
    move_dict = calculate_move_features(home_work_place, df.copy(), date)
    time_dict = calculate_time_features(home_work_place, df.copy(), date)
    df = modify_list_Todataframe(location_dict, move_dict, time_dict)
    df.to_csv(bs.get_save_whole_name('location.csv'))
def calculate_features(whole_path, date):
    """
    calculate features about call information
    :param whole_path:
    :param date:
    :return:
    """
    day = date
    res = pd.DataFrame(columns=features)
    _feature_dict = {}  # 存放我们想要特征的字典
    out_num = {}
    answer_num = {}
    reject_num = {}
    all_num = {}
    for feature in features:
        _feature_dict[feature] = 0  # 开始先全部赋值为0

    df = pd.read_csv(whole_path, header=None)
    for index in df.index:  # 遍历该用户的csv
        #  得到csv的一行
        raw = df.loc[index].values.tolist()
        if str(raw[1]).split(' ')[0] == day:
            if raw[0] == 'OUTGOTING_CALL_TYPE':
                _feature_dict['out_duration'] += int(str(raw[2]).split(';')[2])
                _feature_dict['all_duration'] += int(str(raw[2]).split(';')[2])
                out_num[str(raw[2]).split(';')[1]] = 0
                all_num[str(raw[2]).split(';')[1]] = 0
            if raw[0] == 'INCOMING_CALL_TYPE':
                if str(raw[2]).find('answer') >= 0:
                    _feature_dict['answer_duration'] += int(
                        str(raw[2]).split(';')[2])
                    _feature_dict['all_duration'] += int(
                        str(raw[2]).split(';')[2])
                    answer_num[str(raw[2]).split(';')[1]] = 0
                    all_num[str(raw[2]).split(';')[1]] = 0
                elif str(raw[2]).find('reject') >= 0:
                    _feature_dict['reject_duration'] += int(
                        str(raw[2]).split(';')[2])
                    _feature_dict['all_duration'] += int(
                        str(raw[2]).split(';')[2])
                    reject_num[str(raw[2]).split(';')[1]] = 0
                    all_num[str(raw[2]).split(';')[1]] = 0

    _feature_dict['out_people_number'] = len(out_num.keys())
    _feature_dict['answer_people_number'] = len(answer_num.keys())
    _feature_dict['reject_people_number'] = len(reject_num.keys())
    _feature_dict['all_people_number'] = len(all_num.keys())

    res.loc[len(res)] = list(_feature_dict.values())

    res.to_csv(bs.get_save_whole_name('call.csv'))
Ejemplo n.º 5
0
def predict():
    """
    predict vas
    :param all_inone_path:
    :return:
    """
    clf, scale = load_model()
    df = pd.read_csv(bs.get_save_whole_name('all_inone.csv'))
    if 'Unnamed: 0' in df.columns.values:
        del df['Unnamed: 0']

    _list = get_features_list()
    df = df[_list]
    x = df.loc[0].values
    x = scale.transform([x])
    vas = clf.predict(x)
    return vas[0]
Ejemplo n.º 6
0
def calculate_features(whole_path, date):
    """
    calculate features about screen
    :param whole_path:
    :param date:
    :return:
    """
    day = date
    res = pd.DataFrame(columns=features)
    _feature_dict = {}  # 存放我们想要特征的字典
    for feature in features:
        _feature_dict[feature] = 0  # 开始先全部赋值为0
    df = pd.read_csv(whole_path, header=None)
    for index in df.index:  # 遍历该用户的csv
        #  得到csv的一行
        raw = df.loc[index].values.tolist()
        if str(raw[1]).split(' ')[0] == day:
            if raw[0] == 'SCREEN_ACTIVE_TYPE_2':
                _feature_dict['duration'] += int(raw[2])
                _feature_dict['count'] += 1
                _time = bs.get_whole_time_from_str(raw[1])
                if _time.hour in [0, 1, 2]:
                    _feature_dict['0-3'] += int(raw[2])
                elif _time.hour in [3, 4, 5]:
                    _feature_dict['3-6'] += int(raw[2])
                elif _time.hour in [6, 7, 8]:
                    _feature_dict['6-9'] += int(raw[2])
                elif _time.hour in [9, 10, 11]:
                    _feature_dict['9-12'] += int(raw[2])
                elif _time.hour in [12, 13, 14]:
                    _feature_dict['12-15'] += int(raw[2])
                elif _time.hour in [15, 16, 17]:
                    _feature_dict['15-18'] += int(raw[2])
                elif _time.hour in [18, 19, 20]:
                    _feature_dict['18-21'] += int(raw[2])
                elif _time.hour in [21, 22, 23]:
                    _feature_dict['21-24'] += int(raw[2])
    res.loc[len(res)] = list(_feature_dict.values())
    res.to_csv(bs.get_save_whole_name('screen.csv'))
def calculate_features(whole_path, date):
    """
    calculate features about app
    :return:
    """
    day = date
    features = []
    for x in big_features:
        for y in small_features:
            features.append(x + '_' + y)
    res = pd.DataFrame(columns=features)
    _feature_dict = {}  # 存放我们想要特征的字典
    for feature in features:
        _feature_dict[feature] = 0  # 开始先全部赋值为0
    df = pd.read_csv(whole_path, header=None)
    for index in df.index:  # 遍历该用户的csv
        #  得到csv的一行
        raw = df.loc[index].values.tolist()
        if str(raw[1]).split(' ')[0] == day:
            if raw[0] == 'OPEN_ACT_2':
                for big_feature in big_features:
                    ll = get_class(big_feature)
                    IsClass = False
                    for l in ll:
                        if str(raw[2]).find(l) >= 0:
                            IsClass = True
                    if IsClass:
                        _feature_dict[big_feature + '_' + 'duration'] += int(
                            str(raw[2]).split(';')[-1])
                        _feature_dict[big_feature + '_' + 'count'] += 1
                        _time = bs.get_whole_time_from_str(raw[1])
                        if _time.hour in [0, 1, 2]:
                            _feature_dict[big_feature + '_' +
                                          '0-3_duration'] += int(
                                              str(raw[2]).split(';')[-1])
                            _feature_dict[big_feature + '_' + '0-3_count'] += 1
                        elif _time.hour in [3, 4, 5]:
                            _feature_dict[big_feature + '_' +
                                          '3-6_duration'] += int(
                                              str(raw[2]).split(';')[-1])
                            _feature_dict[big_feature + '_' + '3-6_count'] += 1
                        elif _time.hour in [6, 7, 8]:
                            _feature_dict[big_feature + '_' +
                                          '6-9_duration'] += int(
                                              str(raw[2]).split(';')[-1])
                            _feature_dict[big_feature + '_' + '6-9_count'] += 1
                        elif _time.hour in [9, 10, 11]:
                            _feature_dict[big_feature + '_' +
                                          '9-12_duration'] += int(
                                              str(raw[2]).split(';')[-1])
                            _feature_dict[big_feature + '_' +
                                          '9-12_count'] += 1
                        elif _time.hour in [12, 13, 14]:
                            _feature_dict[big_feature + '_' +
                                          '12-15_duration'] += int(
                                              str(raw[2]).split(';')[-1])
                            _feature_dict[big_feature + '_' +
                                          '12-15_count'] += 1
                        elif _time.hour in [15, 16, 17]:
                            _feature_dict[big_feature + '_' +
                                          '15-18_duration'] += int(
                                              str(raw[2]).split(';')[-1])
                            _feature_dict[big_feature + '_' +
                                          '15-18_count'] += 1
                        elif _time.hour in [18, 19, 20]:
                            _feature_dict[big_feature + '_' +
                                          '18-21_duration'] += int(
                                              str(raw[2]).split(';')[-1])
                            _feature_dict[big_feature + '_' +
                                          '18-21_count'] += 1
                        elif _time.hour in [21, 22, 23]:
                            _feature_dict[big_feature + '_' +
                                          '21-24_duration'] += int(
                                              str(raw[2]).split(';')[-1])
                            _feature_dict[big_feature + '_' +
                                          '21-24_count'] += 1

    res.loc[len(res)] = list(_feature_dict.values())
    res.to_csv(bs.get_save_whole_name('app.csv'))