Example #1
0
def main(user):
    # ****************************************************************************
    # *                         Initial filtering of data                        *
    # ****************************************************************************
    try:
        print(f"Processing user {user}")
        ua = loaders.Useralias()  # noqa
        morning_hour = 7
        evening_hour = 18
        df = loaders.loadUserBluetooth(user, ua)
        if df is None:
            return None  # df is None because user have no bluetooth data
        cnt, var = filterUserMac(df, user, ua.userdct)
        remove_from_index = set(cnt[cnt > 30].index)
        df = df[~df.bt_mac.isin(remove_from_index)]

        # ****************************************************************************
        # *           Filter data to contain only free time before workdays          *
        # ****************************************************************************
        before_workday = df.index.weekday.isin({0, 1, 2, 3, 6})  # is it monday, tuesday, wendnesday, thursday or sunday?
        # print("Done computing before_workday")
        free_time = (evening_hour < df.index.hour) | (df.index.hour < morning_hour)
        # print("Done computing free_time")
        dfs = df[before_workday & free_time]

        dfs['scanned_user'] = dfs.scanned_user.replace(np.NaN, df.bt_mac)

        grouped = dfs.groupby('user')[['scanned_user']].resample('90T', closed='left').agg(concatenater)
        grouped['scanned_user'] = grouped.scanned_user.replace(set(), np.NaN)

        print(user, "fraction of non-nulls:", grouped.scanned_user.notnull().sum() / grouped.shape[0])
        print(user, "number of of non-nulls:", grouped.scanned_user.notnull().sum())
        return (grouped, var)
    except Exception as err:
        print(f"An Exception was raised when processing the user {user}:", file=sys.stderr)
        tb = sys.exc_info()[2]
        print(err.with_traceback(tb), file=sys.stderr)
        return None
Example #2
0
#               (0.220, 0.424, 0.690),
#               (0.749, 0.357, 0.090),
#               (1.000, 1.000, 0.600),
#               (0.941, 0.008, 0.498),
#               (0.400, 0.400, 0.400)]
# sns.set_palette(colorcycle)
# mpl.rcParams['figure.max_open_warning'] = 65
# mpl.rcParams['figure.figsize'] = [12, 7]

from speclib import misc, loaders

pd.set_option('display.max_rows', 55)
pd.set_option('display.max_columns', 10)
pd.set_option('display.width', 1000)

ua = loaders.Useralias()
userlist = loaders.getUserList()

g = nx.Graph()
nicenames = [ua[user] for user in userlist]
g.add_nodes_from(nicenames)

for i, userhash in enumerate(userlist):
    print(f"Processing {ua[userhash]} {i}/{len(userlist)}")
    user = ua[userhash]
    df = loaders.loadUserBluetooth(userhash, ua)
    if df is None:
        continue  # Don't process na-users
    df = df.dropna()
    df = df[df.user != df.scanned_user]  # drop users registering them selves
    df_cnt = df.scanned_user.value_counts()
Example #3
0
              (0.749, 0.357, 0.090),
              (1.000, 1.000, 0.600),
              (0.941, 0.008, 0.498),
              (0.400, 0.400, 0.400)]
sns.set_palette(colorcycle)
mpl.rcParams['figure.max_open_warning'] = 65
mpl.rcParams['figure.figsize'] = [12, 7]
mpl.rcParams['text.usetex'] = False

from speclib import misc, plotting, loaders

import missingno as msno


df = pd.read_json('../../allan_data/RGender_.json')
userAlias = loaders.Useralias()

q = misc.QuestionCompleter(df)
f = misc.QuestionFilterer(df)

msno.matrix(f.__answer)

dna = f.__answer.isna()
dnas = dna.sum(axis=0).sort_values(ascending=False)
msno.bar(f.__answer.loc[:, dnas > dnas.mean()])
plt.show()
dnas = dna.sum(axis=1)
msno.dendrogram(f.__answer)
plt.show()