Ejemplo n.º 1
0
            return (self.__data__.groupby(['id', 'session']).category.first().isin(categories)). \
                groupby('id').value_counts(normalize=normalize).rename(name)

        if application:
            applications = [application] if not isinstance(application, list) else application

            return (self.__data__.groupby(['id', 'session']).application.first().isin(applications)). \
                groupby('id').value_counts(normalize=normalize).rename(name)


if __name__ == "__main__":
    ###########
    # EXAMPLE #
    ###########

    hlp.hi()
    hlp.set_param(log_level=3)

    # Read sample data
    data = hlp.add_dates(
        pd.read_parquet(
            path='../../data/glance/processed_appevents/0a0fe3ed-d788-4427-8820-8b7b696a6033_appevents.parquet'),
        'appevents')

    # Data path
    data_path = '../../data/glance/appevents/0a0fe3ed-d788-4427-8820-8b7b696a6033_appevents.parquet'

    # More sample data
    data2 = pd.read_parquet(path='../../data/glance/appevents/0a9edba1-14e3-466a-8d0c-f8a8170cefc8_appevents.parquet')
    data3 = pd.read_parquet(path='../../data/glance/appevents/0a48d1e8-ead2-404a-a5a2-6b05371200b1_appevents.parquet')
    data4 = hlp.add_dates(pd.concat([data, data2, data3], sort=True), 'appevents')
Ejemplo n.º 2
0
import pandas as pd
from os.path import join
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_validate
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xg

if __name__ == '__main__':

    # Set some parameters
    hlp.hi('Implicit attitude & mobileDNA')
    hlp.set_param(data_dir=join(hlp.DATA_DIR, 'implicit'), log_level=1)

    # Get data
    survey_data = pd.read_csv(join(hlp.DATA_DIR, 'data.csv'), sep='\t')
    log_data = pd.read_csv(join(hlp.DATA_DIR, 'log_data.csv'),
                           sep=';').iloc[:, 1:]

    # Build object
    ae = Appevents(log_data,
                   add_categories=False,
                   add_date_annotation=False,
                   strip=True)
    #ae = Appevents.from_pickle(path=join(hlp.DATA_DIR, 'implicit.ae'))
    del log_data
Ejemplo n.º 3
0
import numpy as np
import mobiledna.core.help as hlp
import pandas as pd
from os.path import join
from mobiledna.core.appevents import Appevents
from mobiledna.core.notifications import Notifications
from tqdm import tqdm
import mobiledna.test.mdecline_features as mf

if __name__ == '__main__':
    # Set some parameters
    hlp.hi('Data merge')
    orig_data_dir = hlp.DATA_DIR
    hlp.set_param(log_level=3)

    ae = Appevents.load_data(join(
        hlp.DATA_DIR, 'mdecline_newest/m-decline_newest_appevents.csv'),
                             sep=';')
    nf = Notifications.load(join(
        hlp.DATA_DIR, 'mdecline_newest/m-decline_newest_notifications.csv'),
                            sep=';')
    # Annotate (already scraped so set to False)
    ae.add_category(scrape=False)
    ae.add_time_of_day()
    ae.add_date_type()
    ae.strip(number_of_days=28, min_log_days=5)

    # BUILD FEATURES
    feature_list = []

    apps = ae.get_applications()[:30].index.tolist()
Ejemplo n.º 4
0
from sklearn.model_selection import cross_validate, cross_val_predict, train_test_split, cross_val_score
from sklearn.dummy import DummyRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xg
from xgboost import XGBRegressor, plot_importance
import mobiledna.test.mdecline_features as mf

if __name__ == '__main__':

    # Set some parameters
    hlp.hi('M-decline age prediction')
    hlp.set_param(data_dir=join(hlp.DATA_DIR, 'm-decline_pred'), log_level=1)

    # Get data
    ae = Appevents.from_pickle(join(hlp.DATA_DIR, 'ae_full_intersection.npy'))
    age = pd.read_pickle(
        path=join(hlp.DATA_DIR, 'full_age_vector_intersection.npy'))
    age = age.sort_index()
    sns.set_palette('Accent')
    sns.set_style('white')
    sns.distplot(age)
    plt.show()

    #age = age[age>35]
    #selection_ids = list(age.index)
    #ae.filter(users=selection_ids,inplace=True)
Ejemplo n.º 5
0
# m-decline grid search
import mobiledna.core.help as hlp
from os.path import join
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_validate
from xgboost import XGBRegressor, XGBRFRegressor, DMatrix, plot_importance,train, XGBClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, StratifiedKFold
import matplotlib.pyplot as plt
import seaborn as sns

if __name__ == '__main__':
    # Set some parameters
    hlp.hi('M-decline grid search')
    hlp.set_param(data_dir=join(hlp.DATA_DIR, 'm-decline_pred'), log_level=1)

    # Get data
    age = pd.read_pickle(path=join(hlp.DATA_DIR, 'full_age_vector_intersection.npy'))
    age = age.sort_index()
    features = pd.read_csv(join(hlp.DATA_DIR, 'feature_matrix.csv')).set_index('id').sort_index()
    X = features.values
    y = age

    bins = np.array([0,28,40,100])
    inds = np.digitize(age,bins)

    label_encoder = LabelEncoder()
    y_new = label_encoder.fit_transform(inds)

    xgb = XGBClassifier()