def main(user): # **************************************************************************** # * Initial filtering of data * # **************************************************************************** try: print(f"Processing user {user}") ua = loaders.Useralias() # noqa morning_hour = 7 evening_hour = 18 df = loaders.loadUserBluetooth(user, ua) if df is None: return None # df is None because user have no bluetooth data cnt, var = filterUserMac(df, user, ua.userdct) remove_from_index = set(cnt[cnt > 30].index) df = df[~df.bt_mac.isin(remove_from_index)] # **************************************************************************** # * Filter data to contain only free time before workdays * # **************************************************************************** before_workday = df.index.weekday.isin({0, 1, 2, 3, 6}) # is it monday, tuesday, wendnesday, thursday or sunday? # print("Done computing before_workday") free_time = (evening_hour < df.index.hour) | (df.index.hour < morning_hour) # print("Done computing free_time") dfs = df[before_workday & free_time] dfs['scanned_user'] = dfs.scanned_user.replace(np.NaN, df.bt_mac) grouped = dfs.groupby('user')[['scanned_user']].resample('90T', closed='left').agg(concatenater) grouped['scanned_user'] = grouped.scanned_user.replace(set(), np.NaN) print(user, "fraction of non-nulls:", grouped.scanned_user.notnull().sum() / grouped.shape[0]) print(user, "number of of non-nulls:", grouped.scanned_user.notnull().sum()) return (grouped, var) except Exception as err: print(f"An Exception was raised when processing the user {user}:", file=sys.stderr) tb = sys.exc_info()[2] print(err.with_traceback(tb), file=sys.stderr) return None
# (0.220, 0.424, 0.690), # (0.749, 0.357, 0.090), # (1.000, 1.000, 0.600), # (0.941, 0.008, 0.498), # (0.400, 0.400, 0.400)] # sns.set_palette(colorcycle) # mpl.rcParams['figure.max_open_warning'] = 65 # mpl.rcParams['figure.figsize'] = [12, 7] from speclib import misc, loaders pd.set_option('display.max_rows', 55) pd.set_option('display.max_columns', 10) pd.set_option('display.width', 1000) ua = loaders.Useralias() userlist = loaders.getUserList() g = nx.Graph() nicenames = [ua[user] for user in userlist] g.add_nodes_from(nicenames) for i, userhash in enumerate(userlist): print(f"Processing {ua[userhash]} {i}/{len(userlist)}") user = ua[userhash] df = loaders.loadUserBluetooth(userhash, ua) if df is None: continue # Don't process na-users df = df.dropna() df = df[df.user != df.scanned_user] # drop users registering them selves df_cnt = df.scanned_user.value_counts()
(0.749, 0.357, 0.090), (1.000, 1.000, 0.600), (0.941, 0.008, 0.498), (0.400, 0.400, 0.400)] sns.set_palette(colorcycle) mpl.rcParams['figure.max_open_warning'] = 65 mpl.rcParams['figure.figsize'] = [12, 7] mpl.rcParams['text.usetex'] = False from speclib import misc, plotting, loaders import missingno as msno df = pd.read_json('../../allan_data/RGender_.json') userAlias = loaders.Useralias() q = misc.QuestionCompleter(df) f = misc.QuestionFilterer(df) msno.matrix(f.__answer) dna = f.__answer.isna() dnas = dna.sum(axis=0).sort_values(ascending=False) msno.bar(f.__answer.loc[:, dnas > dnas.mean()]) plt.show() dnas = dna.sum(axis=1) msno.dendrogram(f.__answer) plt.show()