groupby('id').value_counts(normalize=normalize).rename(name) if application: applications = [application] if not isinstance(application, list) else application return (self.__data__.groupby(['id', 'session']).application.first().isin(applications)). \ groupby('id').value_counts(normalize=normalize).rename(name) if __name__ == "__main__": ########### # EXAMPLE # ########### hlp.hi() hlp.set_param(log_level=3) # Read sample data data = hlp.add_dates( pd.read_parquet( path='../../data/glance/processed_appevents/0a0fe3ed-d788-4427-8820-8b7b696a6033_appevents.parquet'), 'appevents') # Data path data_path = '../../data/glance/appevents/0a0fe3ed-d788-4427-8820-8b7b696a6033_appevents.parquet' # More sample data data2 = pd.read_parquet(path='../../data/glance/appevents/0a9edba1-14e3-466a-8d0c-f8a8170cefc8_appevents.parquet') data3 = pd.read_parquet(path='../../data/glance/appevents/0a48d1e8-ead2-404a-a5a2-6b05371200b1_appevents.parquet') data4 = hlp.add_dates(pd.concat([data, data2, data3], sort=True), 'appevents')
from mobiledna.core.appevents import Appevents from os.path import join, pardir from os import listdir from mobiledna.core import help as hlp from tqdm import tqdm import numpy as np import pandas as pd from sklearn.preprocessing import normalize, scale from scipy.stats import pearsonr import matplotlib.pyplot as plt if __name__ == '__main__': hlp.hi() hlp.set_param(log_level=1) hlp.set_dir(join(pardir, pardir, 'data', 'glance', 'objects', 'appevents')) # Set dirs ae_objects_dir = join(pardir, pardir, 'data', 'glance', 'objects', 'appevents') ae_dir = join(pardir, pardir, 'data', 'glance', 'appevents') ae_processed_dir = join(pardir, pardir, 'data', 'glance', 'processed_appevents') # Get file names ae_data_files = sorted(listdir(ae_dir)) big_data = pd.DataFrame() # Loop over files and process for ae_data_file in tqdm(ae_data_files[:1], desc='Processing appevents'):
from sklearn.dummy import DummyRegressor from sklearn.neural_network import MLPRegressor from sklearn.svm import SVR from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor from tqdm import tqdm import matplotlib.pyplot as plt import seaborn as sns import xgboost as xg from xgboost import XGBRegressor, plot_importance import mobiledna.test.mdecline_features as mf if __name__ == '__main__': # Set some parameters hlp.hi('M-decline age prediction') hlp.set_param(data_dir=join(hlp.DATA_DIR, 'm-decline_pred'), log_level=1) # Get data ae = Appevents.from_pickle(join(hlp.DATA_DIR, 'ae_full_intersection.npy')) age = pd.read_pickle( path=join(hlp.DATA_DIR, 'full_age_vector_intersection.npy')) age = age.sort_index() sns.set_palette('Accent') sns.set_style('white') sns.distplot(age) plt.show() #age = age[age>35] #selection_ids = list(age.index) #ae.filter(users=selection_ids,inplace=True)
from os.path import join from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression from sklearn.model_selection import cross_validate from sklearn.neural_network import MLPRegressor from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor from tqdm import tqdm import matplotlib.pyplot as plt import seaborn as sns import xgboost as xg if __name__ == '__main__': # Set some parameters hlp.hi('Implicit attitude & mobileDNA') hlp.set_param(data_dir=join(hlp.DATA_DIR, 'implicit'), log_level=1) # Get data survey_data = pd.read_csv(join(hlp.DATA_DIR, 'data.csv'), sep='\t') log_data = pd.read_csv(join(hlp.DATA_DIR, 'log_data.csv'), sep=';').iloc[:, 1:] # Build object ae = Appevents(log_data, add_categories=False, add_date_annotation=False, strip=True) #ae = Appevents.from_pickle(path=join(hlp.DATA_DIR, 'implicit.ae')) del log_data # Filter object (only users with over two weeks of logging)
# Get new name (subtract date, add day of the week) new_col = time_col[:-4] + 'TOD' # Process each row tqdm.pandas(desc=f"Adding tod <{time_col}>", position=0, leave=True) df[new_col] = hours.progress_apply(label_hour) return df if __name__ == '__main__': # Let's go hlp.hi() hlp.set_dir(join(pardir, 'cache')) hlp.set_param(log_level=1, data_dir=join(pardir, pardir, 'data', 'glance', 'processed_appevents'), cache_dir=join(pardir, 'cache')) # Load the data and gather apps log('Collecting app names.', lvl=1) appevents_files = listdir(hlp.DATA_DIR) apps = {} # Load data data = hlp.load(path=join(hlp.DATA_DIR, appevents_files[0]), index='appevents') # Add apps to the set (no duplicates) app_counts = Counter(list(data.application)) apps = {**apps, **app_counts}