return (self.__data__.groupby(['id', 'session']).category.first().isin(categories)). \ groupby('id').value_counts(normalize=normalize).rename(name) if application: applications = [application] if not isinstance(application, list) else application return (self.__data__.groupby(['id', 'session']).application.first().isin(applications)). \ groupby('id').value_counts(normalize=normalize).rename(name) if __name__ == "__main__": ########### # EXAMPLE # ########### hlp.hi() hlp.set_param(log_level=3) # Read sample data data = hlp.add_dates( pd.read_parquet( path='../../data/glance/processed_appevents/0a0fe3ed-d788-4427-8820-8b7b696a6033_appevents.parquet'), 'appevents') # Data path data_path = '../../data/glance/appevents/0a0fe3ed-d788-4427-8820-8b7b696a6033_appevents.parquet' # More sample data data2 = pd.read_parquet(path='../../data/glance/appevents/0a9edba1-14e3-466a-8d0c-f8a8170cefc8_appevents.parquet') data3 = pd.read_parquet(path='../../data/glance/appevents/0a48d1e8-ead2-404a-a5a2-6b05371200b1_appevents.parquet') data4 = hlp.add_dates(pd.concat([data, data2, data3], sort=True), 'appevents')
import pandas as pd from os.path import join from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression from sklearn.model_selection import cross_validate from sklearn.neural_network import MLPRegressor from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor from tqdm import tqdm import matplotlib.pyplot as plt import seaborn as sns import xgboost as xg if __name__ == '__main__': # Set some parameters hlp.hi('Implicit attitude & mobileDNA') hlp.set_param(data_dir=join(hlp.DATA_DIR, 'implicit'), log_level=1) # Get data survey_data = pd.read_csv(join(hlp.DATA_DIR, 'data.csv'), sep='\t') log_data = pd.read_csv(join(hlp.DATA_DIR, 'log_data.csv'), sep=';').iloc[:, 1:] # Build object ae = Appevents(log_data, add_categories=False, add_date_annotation=False, strip=True) #ae = Appevents.from_pickle(path=join(hlp.DATA_DIR, 'implicit.ae')) del log_data
import numpy as np import mobiledna.core.help as hlp import pandas as pd from os.path import join from mobiledna.core.appevents import Appevents from mobiledna.core.notifications import Notifications from tqdm import tqdm import mobiledna.test.mdecline_features as mf if __name__ == '__main__': # Set some parameters hlp.hi('Data merge') orig_data_dir = hlp.DATA_DIR hlp.set_param(log_level=3) ae = Appevents.load_data(join( hlp.DATA_DIR, 'mdecline_newest/m-decline_newest_appevents.csv'), sep=';') nf = Notifications.load(join( hlp.DATA_DIR, 'mdecline_newest/m-decline_newest_notifications.csv'), sep=';') # Annotate (already scraped so set to False) ae.add_category(scrape=False) ae.add_time_of_day() ae.add_date_type() ae.strip(number_of_days=28, min_log_days=5) # BUILD FEATURES feature_list = [] apps = ae.get_applications()[:30].index.tolist()
from sklearn.model_selection import cross_validate, cross_val_predict, train_test_split, cross_val_score from sklearn.dummy import DummyRegressor from sklearn.neural_network import MLPRegressor from sklearn.svm import SVR from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor from tqdm import tqdm import matplotlib.pyplot as plt import seaborn as sns import xgboost as xg from xgboost import XGBRegressor, plot_importance import mobiledna.test.mdecline_features as mf if __name__ == '__main__': # Set some parameters hlp.hi('M-decline age prediction') hlp.set_param(data_dir=join(hlp.DATA_DIR, 'm-decline_pred'), log_level=1) # Get data ae = Appevents.from_pickle(join(hlp.DATA_DIR, 'ae_full_intersection.npy')) age = pd.read_pickle( path=join(hlp.DATA_DIR, 'full_age_vector_intersection.npy')) age = age.sort_index() sns.set_palette('Accent') sns.set_style('white') sns.distplot(age) plt.show() #age = age[age>35] #selection_ids = list(age.index) #ae.filter(users=selection_ids,inplace=True)
# m-decline grid search import mobiledna.core.help as hlp from os.path import join import pandas as pd import numpy as np from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import cross_validate from xgboost import XGBRegressor, XGBRFRegressor, DMatrix, plot_importance,train, XGBClassifier from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, StratifiedKFold import matplotlib.pyplot as plt import seaborn as sns if __name__ == '__main__': # Set some parameters hlp.hi('M-decline grid search') hlp.set_param(data_dir=join(hlp.DATA_DIR, 'm-decline_pred'), log_level=1) # Get data age = pd.read_pickle(path=join(hlp.DATA_DIR, 'full_age_vector_intersection.npy')) age = age.sort_index() features = pd.read_csv(join(hlp.DATA_DIR, 'feature_matrix.csv')).set_index('id').sort_index() X = features.values y = age bins = np.array([0,28,40,100]) inds = np.digitize(age,bins) label_encoder = LabelEncoder() y_new = label_encoder.fit_transform(inds) xgb = XGBClassifier()