Example #1
0
                groupby('id').value_counts(normalize=normalize).rename(name)

        if application:
            applications = [application] if not isinstance(application, list) else application

            return (self.__data__.groupby(['id', 'session']).application.first().isin(applications)). \
                groupby('id').value_counts(normalize=normalize).rename(name)


if __name__ == "__main__":
    ###########
    # EXAMPLE #
    ###########

    hlp.hi()
    hlp.set_param(log_level=3)

    # Read sample data
    data = hlp.add_dates(
        pd.read_parquet(
            path='../../data/glance/processed_appevents/0a0fe3ed-d788-4427-8820-8b7b696a6033_appevents.parquet'),
        'appevents')

    # Data path
    data_path = '../../data/glance/appevents/0a0fe3ed-d788-4427-8820-8b7b696a6033_appevents.parquet'

    # More sample data
    data2 = pd.read_parquet(path='../../data/glance/appevents/0a9edba1-14e3-466a-8d0c-f8a8170cefc8_appevents.parquet')
    data3 = pd.read_parquet(path='../../data/glance/appevents/0a48d1e8-ead2-404a-a5a2-6b05371200b1_appevents.parquet')
    data4 = hlp.add_dates(pd.concat([data, data2, data3], sort=True), 'appevents')
from mobiledna.core.appevents import Appevents
from os.path import join, pardir
from os import listdir
from mobiledna.core import help as hlp
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize, scale
from scipy.stats import pearsonr
import matplotlib.pyplot as plt

if __name__ == '__main__':

    hlp.hi()
    hlp.set_param(log_level=1)
    hlp.set_dir(join(pardir, pardir, 'data', 'glance', 'objects', 'appevents'))

    # Set dirs
    ae_objects_dir = join(pardir, pardir, 'data', 'glance', 'objects',
                          'appevents')
    ae_dir = join(pardir, pardir, 'data', 'glance', 'appevents')
    ae_processed_dir = join(pardir, pardir, 'data', 'glance',
                            'processed_appevents')

    # Get file names
    ae_data_files = sorted(listdir(ae_dir))

    big_data = pd.DataFrame()

    # Loop over files and process
    for ae_data_file in tqdm(ae_data_files[:1], desc='Processing appevents'):
Example #3
0
from sklearn.dummy import DummyRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xg
from xgboost import XGBRegressor, plot_importance
import mobiledna.test.mdecline_features as mf

if __name__ == '__main__':

    # Set some parameters
    hlp.hi('M-decline age prediction')
    hlp.set_param(data_dir=join(hlp.DATA_DIR, 'm-decline_pred'), log_level=1)

    # Get data
    ae = Appevents.from_pickle(join(hlp.DATA_DIR, 'ae_full_intersection.npy'))
    age = pd.read_pickle(
        path=join(hlp.DATA_DIR, 'full_age_vector_intersection.npy'))
    age = age.sort_index()
    sns.set_palette('Accent')
    sns.set_style('white')
    sns.distplot(age)
    plt.show()

    #age = age[age>35]
    #selection_ids = list(age.index)
    #ae.filter(users=selection_ids,inplace=True)
from os.path import join
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_validate
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xg

if __name__ == '__main__':

    # Set some parameters
    hlp.hi('Implicit attitude & mobileDNA')
    hlp.set_param(data_dir=join(hlp.DATA_DIR, 'implicit'), log_level=1)

    # Get data
    survey_data = pd.read_csv(join(hlp.DATA_DIR, 'data.csv'), sep='\t')
    log_data = pd.read_csv(join(hlp.DATA_DIR, 'log_data.csv'),
                           sep=';').iloc[:, 1:]

    # Build object
    ae = Appevents(log_data,
                   add_categories=False,
                   add_date_annotation=False,
                   strip=True)
    #ae = Appevents.from_pickle(path=join(hlp.DATA_DIR, 'implicit.ae'))
    del log_data

    # Filter object (only users with over two weeks of logging)
Example #5
0
        # Get new name (subtract date, add day of the week)
        new_col = time_col[:-4] + 'TOD'

        # Process each row
        tqdm.pandas(desc=f"Adding tod <{time_col}>", position=0, leave=True)
        df[new_col] = hours.progress_apply(label_hour)

    return df


if __name__ == '__main__':
    # Let's go
    hlp.hi()
    hlp.set_dir(join(pardir, 'cache'))
    hlp.set_param(log_level=1,
                  data_dir=join(pardir, pardir, 'data', 'glance',
                                'processed_appevents'),
                  cache_dir=join(pardir, 'cache'))

    # Load the data and gather apps
    log('Collecting app names.', lvl=1)
    appevents_files = listdir(hlp.DATA_DIR)
    apps = {}

    # Load data
    data = hlp.load(path=join(hlp.DATA_DIR, appevents_files[0]),
                    index='appevents')

    # Add apps to the set (no duplicates)
    app_counts = Counter(list(data.application))
    apps = {**apps, **app_counts}