def main(): db_helper = ApeicDBHelper() users = db_helper.get_users() accs = [] mfu_accs = [] for user in users: sessions = db_helper.get_sessions(user) if len(sessions) < 20: continue else: print colored(user, attrs=['blink']), training_sessions, testing_sessions = split(sessions, 0.84) counter = Counter(map(lambda x: x['application'], list(itertools.chain(*training_sessions)))) # counter = Counter(map(lambda x: x['application'], map(lambda x: x[0], training_sessions))) # installed_apps = train(training_sessions) installed_apps = train_1(training_sessions) # installed_apps = train_2(training_sessions) acc = test(testing_sessions, installed_apps, counter) accs.append(acc) predictor = MFUPredictor() predictor.train(list(itertools.chain(*training_sessions))) # testing_sessions = filter(lambda x: len(x) > 1, testing_sessions) mfu_acc, mrr = predictor.test(list(itertools.chain(*testing_sessions)), 4) print mfu_acc mfu_accs.append(mfu_acc) # installed_apps = train_by_ic([list(itertools.chain(*training_sessions))]) # test([list(itertools.chain(*testing_sessions))], installed_apps) # break print sum(accs)/len(accs), sum(mfu_accs)/len(mfu_accs)
def main(): db_helper = ApeicDBHelper() users = db_helper.get_users() accs = [] mfu_accs = [] for user in users: sessions = db_helper.get_sessions(user) if len(sessions) < 20: continue else: print colored(user, attrs=['blink']), training_sessions, testing_sessions = split(sessions, 0.84) counter = Counter( map(lambda x: x['application'], list(itertools.chain(*training_sessions)))) # counter = Counter(map(lambda x: x['application'], map(lambda x: x[0], training_sessions))) # installed_apps = train(training_sessions) installed_apps = train_1(training_sessions) # installed_apps = train_2(training_sessions) acc = test(testing_sessions, installed_apps, counter) accs.append(acc) predictor = MFUPredictor() predictor.train(list(itertools.chain(*training_sessions))) # testing_sessions = filter(lambda x: len(x) > 1, testing_sessions) mfu_acc, mrr = predictor.test(list(itertools.chain(*testing_sessions)), 4) print mfu_acc mfu_accs.append(mfu_acc) # installed_apps = train_by_ic([list(itertools.chain(*training_sessions))]) # test([list(itertools.chain(*testing_sessions))], installed_apps) # break print sum(accs) / len(accs), sum(mfu_accs) / len(mfu_accs)
def main(): db_helper = ApeicDBHelper() users = db_helper.get_users() for user in users: logs = db_helper.get_logs(user) preprocessor = Preprocessor(logs) preprocessor.extract_stay_points()
def main(): db_helper = ApeicDBHelper() users = db_helper.get_users() print len(users) for user in users: print colored(user, attrs=['blink']) print_app_repeatability(user) print
def main(): # TODO: ignore some user's data? db = ApeicDBHelper() logs = [] for user in db.get_users(): logs.extend(db.get_logs(user)) analyzer = RealDataAnalyzer() analyzer.get_env_context_distrs(logs)
def main(): hits = 0.0 misses = 0.0 db_helper = ApeicDBHelper() users = db_helper.get_users() accuracies = [] for user in users: if user == '11d1ef9f845ec10e': continue print colored(user, attrs=['blink']) sessions = db_helper.get_sessions(user) last = sessions[0][-1]['application'] test = [sessions[0]] for s in sessions[1:]: if s[0]['application'] == last: if s[1:]: test.append(s[1:]) else: test.append(s) last = s[-1]['application'] training_sessions, testing_sessions = split(test, 0.8) training_sessions, testing_sessions = split(sessions, 0.8) logs = aggregate_sessions(training_sessions) extractor = FeatureExtractor() X, y = extractor.generate_training_instances(logs) nb = MultinomialNB() predictor = nb.fit(X, y) last_used_app = '' for session in testing_sessions: for log in session: # if log['application'] in ['com.android.settings', \ # 'com.android.packageinstaller', 'com.htc.android.worldclock', 'com.android.systemui']: # continue instance = extractor.transform(last_used_app, log) ranking = sorted(zip(predictor.classes_, predictor.predict_proba(instance)[0]), \ key=operator.itemgetter(1), reverse=True) candidates = map(lambda x: x[0], ranking[:4]) if log['application'] in candidates: hits += 1.0 else: misses += 1.0 last_used_app = log['application'] acc = hits/(hits + misses) accuracies.append(acc) print acc, hits, misses # break print sum(accuracies)/len(accuracies)
def get_app_usage_distrs(self): app_usage_distrs = {} db = ApeicDBHelper() for user in db.get_users(): sessions = db.get_sessions(user) logs = list(itertools.chain(*sessions)) logs = map(lambda x: self._format(x), logs) user_traces = sorted(logs, key=itemgetter(-1)) for app, traces in groupby(user_traces, lambda x: x[-1]): cluster = list(traces) count = app_usage_distrs.keys().count(app) app_usage_distrs['%s_%d' % (app, count + 1)] = \ tuple(map(lambda i: self._get_feature_distr(i, cluster), range(len(self.feature_names)))) return app_usage_distrs
def compute_repeatability(user): db_helper = ApeicDBHelper() sessions = db_helper.get_sessions(user) results = defaultdict(list) for session in sessions: apps = [x['application'] for x in session] counts = Counter(apps) for app in counts: results[app].append(counts[app]) for app in results: results[app] = sum(results[app])/float(len(results[app])) return sorted(results.iteritems(), key=operator.itemgetter(1), reverse=True)
def get_installation_history(user): apps = ApeicDBHelper.select('%s_installed_apps' % user, select_items=('application', 'start_date')) history = defaultdict(list) map(lambda x: history[x[1].date()].append(x[0]), apps) history = sorted(history.iteritems(), key=operator.itemgetter(0)) return history
def get_uninstallation_history(user): apps = DB.select('%s_installed_apps' % user, select_items=('application', 'end_date'), where_items={'end_date IS NOT': None}) history = defaultdict(list) map(lambda x: history[x[1].date()].append(x[0]), apps) history = sorted(history.iteritems(), key=operator.itemgetter(0)) return history
def main(): db_helper = ApeicDBHelper() hits = 0.0 misses = 0.0 users = db_helper.get_users() for user in users: logs = db_helper.get_logs(user) training_data, testing_data = split(logs) predictor = TAPPredictor() predictor.train(training_data) for i in xrange(2, len(testing_data)): candidates = predictor.predict(testing_data[i-2]['application'], testing_data[i-1]['application'], k) if testing_data[i]['application'] in candidates: hits += 1.0 else: misses += 1.0 print k, hits/(hits + misses)
def get_segment_len_distrs(self): distr = defaultdict(list) db = ApeicDBHelper() for user in db.get_users(): sessions = db.get_sessions(user) for session in sessions: int_context = map(lambda x: x['application'], session) session_len = len(int_context) initiator = max(set(int_context), key=int_context.count) indices = [i for i, x in enumerate(int_context) if x == initiator] + [session_len] if session_len <= 2: distr[session_len].append((session_len, )) else: segment_lens = () for i in xrange(indices[0]): segment_lens += (1, ) for i in xrange(1, len(indices)): segment_lens += (indices[i] - indices[i-1], ) distr[session_len].append(segment_lens) distr = dict(map(lambda x: (x, Counter(distr[x]).items()), distr)) return distr
def main(): db_helper = ApeicDBHelper() users = db_helper.get_users() accuracies = [] for user in users: if user == '11d1ef9f845ec10e': continue print colored(user, attrs=['blink']) logs = db_helper.get_logs(user) predictor = APPRushPredictor() training_logs, testing_logs = predictor.split(logs, 0.8) predictor.train(training_logs) hits = 0.0 misses = 0.0 # testing_logs = training_logs last_log = testing_logs[0] for log in testing_logs[1:]: if log['application'] != last_log['application'] or log['id'] == testing_logs[-1]['id']: candidates = predictor.predict(last_log, log, 5) if log['application'] in candidates: hits += 1 else: misses += 1 last_log = log acc = hits/(hits + misses) accuracies.append(acc) print acc print sum(accuracies)/len(accuracies)
def main(): db_helper = ApeicDBHelper() users = db_helper.get_users() accuracies = [] for user in users: if user == '11d1ef9f845ec10e': continue print colored(user, attrs=['blink']) logs = db_helper.get_logs(user) predictor = APPRushPredictor() training_logs, testing_logs = predictor.split(logs, 0.8) predictor.train(training_logs) hits = 0.0 misses = 0.0 # testing_logs = training_logs last_log = testing_logs[0] for log in testing_logs[1:]: if log['application'] != last_log['application'] or log[ 'id'] == testing_logs[-1]['id']: candidates = predictor.predict(last_log, log, 5) if log['application'] in candidates: hits += 1 else: misses += 1 last_log = log acc = hits / (hits + misses) accuracies.append(acc) print acc print sum(accuracies) / len(accuracies)
def get_segment_len_distrs(self): distr = defaultdict(list) db = ApeicDBHelper() for user in db.get_users(): sessions = db.get_sessions(user) for session in sessions: int_context = map(lambda x: x['application'], session) session_len = len(int_context) initiator = max(set(int_context), key=int_context.count) indices = [ i for i, x in enumerate(int_context) if x == initiator ] + [session_len] if session_len <= 2: distr[session_len].append((session_len, )) else: segment_lens = () for i in xrange(indices[0]): segment_lens += (1, ) for i in xrange(1, len(indices)): segment_lens += (indices[i] - indices[i - 1], ) distr[session_len].append(segment_lens) distr = dict(map(lambda x: (x, Counter(distr[x]).items()), distr)) return distr
def main(): hits = 0.0 misses = 0.0 db_helper = ApeicDBHelper() users = db_helper.get_users() # users = ['475f258ecc566658'] accuracies = [] trigger_errors = [] for user in users: if user == '11d1ef9f845ec10e': continue sessions = db_helper.get_sessions(user) training_sessions, testing_sessions = split(sessions, 0.8) logs = aggregate_sessions(training_sessions) extractor = FeatureExtractor() X, y = extractor.generate_training_instances(logs) nb = MultinomialNB() nb_predictor = nb.fit(X, y) sessions = db_helper.get_sessions(user) print colored(user, attrs=['blink']) predictor = ApeicPredictor() for session in training_sessions: predictor.update(session) lengths = map(lambda x: len(x), training_sessions) avg = sum(lengths)/float(len(lengths)) # print avg, len(db_helper.get_used_apps(user)) # hits = 0.0 # misses = 0.0 new = 0.0 trigger_misses = 0.0 error = defaultdict(int) last_session = testing_sessions[0] last = '' for session in testing_sessions: # print '\n'.join(map(lambda x: x['application'], session)) for i in xrange(len(session)): # print session[i]['activity'], session[i]['activity_conf'] # if session[i]['application'] in ['com.android.settings', \ # 'com.android.packageinstaller', 'com.htc.android.worldclock', 'com.android.systemui']: # continue instance = extractor.transform(last, session[i]) ei = dict(zip(nb_predictor.classes_, nb_predictor.predict_proba(instance)[0]), \ key=operator.itemgetter(1), reverse=True) candidates = predictor.predict(session[:i], last_session[-1]['application'], ei, 5) if session[i]['application'] in candidates: hits += 1.0 else: misses += 1.0 if session[i]['application'] not in ei: new += 1.0 if i == 0: # print '\t', session[i]['application'] trigger_misses += 1.0 error[session[i]['application']] += 1 # print last_session = session last = session[0]['application'] predictor.update(session) # logs = aggregate_sessions(training_sessions + [session]) # extractor = FeatureExtractor() # X, y = extractor.generate_training_instances(logs) # nb = MultinomialNB() # nb_predictor = nb.fit(X, y) acc = (hits)/(hits + misses) accuracies.append(acc) trigger_errors.append(trigger_misses/misses if misses > 0 else 0) print acc, trigger_misses/misses if misses > 0 else 0, trigger_misses, new, misses, (hits + misses) # print error # break print sum(accuracies)/len(accuracies), sum(trigger_errors)/len(trigger_errors)
def test(k=4, ignore_initiator=True): db_helper = ApeicDBHelper() users = filter(lambda x: x != '11d1ef9f845ec10e', db_helper.get_users()) total_hits = 0.0 total_misses = 0.0 m = 0.0 initial_misses = 0.0 nb_total_hits = 0.0 nb_total_misses = 0.0 nb_m = 0.0 lu_total_hits = 0.0 lu_total_misses = 0.0 lu_m = 0.0 mru_total_hits = 0.0 mru_total_misses = 0.0 mru_m = 0.0 mfu_total_hits = 0.0 mfu_total_misses = 0.0 mfu_m = 0.0 for user in users: # print colored(user, attrs=['blink']) sessions = db_helper.get_sessions(user) training_sessions, testing_sessions = split(sessions, 0.8) past_launches = map(lambda x: x['application'], chain(*training_sessions)) mru_candidates = past_launches[-k:] l1 = past_launches[-1] l2 = past_launches[-2] used_apps = [] for session in training_sessions: used_apps += map(lambda x: x['application'], session) counter = Counter(used_apps) mfu_candidates = map(lambda x: x[0], counter.most_common(k)) predictor = ApeicPredictor() predictor.train(training_sessions) lu_predictor = LUPredictor(0.25) lu_predictor.train(list(chain(*training_sessions))) hits = 0.0 misses = 0.0 # initial_misses = 0.0 unseen_misses = 0.0 starter = '' terminator = '' last_app = '' last_date = '' # ss = 1 for session in testing_sessions: """ if last_date != session[0]['datetime'].day and last_date != '': if hits + misses > 0: print ss, hits/(hits + misses) else: pass print 'zero' # hits = 0.0 # misses = 0.0 ss += 1 # if ss == 7: # sys.exit() """ # print '===' # if len(session) > 0: # print '\n'.join(map(lambda x: x['application'], session)) # print for i in xrange(len(session)): if ignore_initiator and i == 0: continue apeic_candidates, nb_candidates = predictor.predict( session[:i + 1], last_app, terminator, k) assert len(apeic_candidates) <= k and len(nb_candidates) <= k lu_candidates = lu_predictor.predict(l2, l1, k) if session[i]['application'] in apeic_candidates: total_hits += 1.0 hits += 1.0 m += 1.0 / ( apeic_candidates.index(session[i]['application']) + 1) else: total_misses += 1.0 # print i, session[i]['application'] # print '\t', apeic_candidates misses += 1.0 if i == 0: initial_misses += 1.0 if session[i]['application'] not in counter.keys( ) and i == 0: # print session[i]['application'] unseen_misses += 1.0 if session[i]['application'] in nb_candidates: nb_total_hits += 1.0 nb_m += 1.0 / ( nb_candidates.index(session[i]['application']) + 1) else: nb_total_misses += 1.0 if session[i]['application'] in lu_candidates: lu_total_hits += 1.0 lu_m += 1.0 / ( lu_candidates.index(session[i]['application']) + 1) else: lu_total_misses += 1.0 if session[i]['application'] in mru_candidates: mru_total_hits += 1.0 mru_m += 1.0 / ( mru_candidates.index(session[i]['application']) + 1) else: mru_total_misses += 1.0 if session[i]['application'] in mfu_candidates: mfu_total_hits += 1.0 mfu_m += 1.0 / ( mfu_candidates.index(session[i]['application']) + 1) else: mfu_total_misses += 1.0 mru_candidates = mru_candidates[1:] + [ session[i]['application'] ] l2 = l1 l1 = session[i]['application'] last_app = session[i]['application'] starter = session[0]['application'] terminator = session[-1]['application'] last_date = session[0]['datetime'].day # predictor.update(session) if hits + misses == 0: continue acc = (hits) / (hits + misses) # print unseen_misses print acc, hits, misses, initial_misses, unseen_misses break print k print colored('APEIC', 'cyan'), \ (total_hits)/(total_hits + total_misses), m/(total_hits + total_misses), initial_misses, total_hits, total_misses print colored('NB ', 'cyan'), \ (nb_total_hits)/(nb_total_hits + nb_total_misses), nb_m/(nb_total_hits + nb_total_misses) print colored('LU ', 'cyan'), \ (lu_total_hits)/(lu_total_hits + lu_total_misses), lu_m/(lu_total_hits + lu_total_misses) print colored('MRU ', 'cyan'), \ (mru_total_hits)/(mru_total_hits + mru_total_misses), mru_m/(mru_total_hits + mru_total_misses) print colored('MFU ', 'cyan'), \ (mfu_total_hits)/(mfu_total_hits + mfu_total_misses), mfu_m/(mfu_total_hits + mfu_total_misses)
def get_session_len_distr(verbose=True): db_helper = ApeicDBHelper() sessions = chain(*map(lambda x: db_helper.get_sessions(x), db_helper.get_users())) session_lengths = map(lambda x: len(x), sessions) app_numbers = map(lambda x: len(set(map(lambda y: y['application']))), x), sessions))
def main(): hits = 0.0 misses = 0.0 db_helper = ApeicDBHelper() users = db_helper.get_users() # users = ['475f258ecc566658'] accuracies = [] trigger_errors = [] for user in users: if user == '11d1ef9f845ec10e': continue sessions = db_helper.get_sessions(user) training_sessions, testing_sessions = split(sessions, 0.8) logs = aggregate_sessions(training_sessions) extractor = FeatureExtractor() X, y = extractor.generate_training_instances(logs) nb = MultinomialNB() nb_predictor = nb.fit(X, y) sessions = db_helper.get_sessions(user) print colored(user, attrs=['blink']) predictor = ApeicPredictor() for session in training_sessions: predictor.update(session) lengths = map(lambda x: len(x), training_sessions) avg = sum(lengths) / float(len(lengths)) # print avg, len(db_helper.get_used_apps(user)) # hits = 0.0 # misses = 0.0 new = 0.0 trigger_misses = 0.0 error = defaultdict(int) last_session = testing_sessions[0] last = '' for session in testing_sessions: # print '\n'.join(map(lambda x: x['application'], session)) for i in xrange(len(session)): # print session[i]['activity'], session[i]['activity_conf'] # if session[i]['application'] in ['com.android.settings', \ # 'com.android.packageinstaller', 'com.htc.android.worldclock', 'com.android.systemui']: # continue instance = extractor.transform(last, session[i]) ei = dict(zip(nb_predictor.classes_, nb_predictor.predict_proba(instance)[0]), \ key=operator.itemgetter(1), reverse=True) candidates = predictor.predict(session[:i], last_session[-1]['application'], ei, 5) if session[i]['application'] in candidates: hits += 1.0 else: misses += 1.0 if session[i]['application'] not in ei: new += 1.0 if i == 0: # print '\t', session[i]['application'] trigger_misses += 1.0 error[session[i]['application']] += 1 # print last_session = session last = session[0]['application'] predictor.update(session) # logs = aggregate_sessions(training_sessions + [session]) # extractor = FeatureExtractor() # X, y = extractor.generate_training_instances(logs) # nb = MultinomialNB() # nb_predictor = nb.fit(X, y) acc = (hits) / (hits + misses) accuracies.append(acc) trigger_errors.append(trigger_misses / misses if misses > 0 else 0) print acc, trigger_misses / misses if misses > 0 else 0, trigger_misses, new, misses, ( hits + misses) # print error # break print sum(accuracies) / len(accuracies), sum(trigger_errors) / len( trigger_errors)
def test(k=4, ignore_initiator=True): db_helper = ApeicDBHelper() users = filter(lambda x: x != '11d1ef9f845ec10e', db_helper.get_users()) total_hits = 0.0 total_misses = 0.0 m = 0.0 initial_misses = 0.0 nb_total_hits = 0.0 nb_total_misses = 0.0 nb_m = 0.0 lu_total_hits = 0.0 lu_total_misses = 0.0 lu_m = 0.0 mru_total_hits = 0.0 mru_total_misses = 0.0 mru_m = 0.0 mfu_total_hits = 0.0 mfu_total_misses = 0.0 mfu_m = 0.0 for user in users: # print colored(user, attrs=['blink']) sessions = db_helper.get_sessions(user) training_sessions, testing_sessions = split(sessions, 0.8) past_launches = map(lambda x: x['application'], chain(*training_sessions)) mru_candidates = past_launches[-k:] l1 = past_launches[-1] l2 = past_launches[-2] used_apps = [] for session in training_sessions: used_apps += map(lambda x: x['application'], session) counter = Counter(used_apps) mfu_candidates = map(lambda x: x[0], counter.most_common(k)) predictor = ApeicPredictor() predictor.train(training_sessions) lu_predictor = LUPredictor(0.25) lu_predictor.train(list(chain(*training_sessions))) hits = 0.0 misses = 0.0 # initial_misses = 0.0 unseen_misses = 0.0 starter = '' terminator = '' last_app = '' last_date = '' # ss = 1 for session in testing_sessions: """ if last_date != session[0]['datetime'].day and last_date != '': if hits + misses > 0: print ss, hits/(hits + misses) else: pass print 'zero' # hits = 0.0 # misses = 0.0 ss += 1 # if ss == 7: # sys.exit() """ # print '===' # if len(session) > 0: # print '\n'.join(map(lambda x: x['application'], session)) # print for i in xrange(len(session)): if ignore_initiator and i == 0: continue apeic_candidates, nb_candidates = predictor.predict(session[:i+1], last_app, terminator, k) assert len(apeic_candidates) <= k and len(nb_candidates) <= k lu_candidates = lu_predictor.predict(l2, l1, k) if session[i]['application'] in apeic_candidates: total_hits += 1.0 hits += 1.0 m += 1.0/(apeic_candidates.index(session[i]['application']) + 1) else: total_misses += 1.0 # print i, session[i]['application'] # print '\t', apeic_candidates misses += 1.0 if i == 0: initial_misses += 1.0 if session[i]['application'] not in counter.keys() and i == 0: # print session[i]['application'] unseen_misses += 1.0 if session[i]['application'] in nb_candidates: nb_total_hits += 1.0 nb_m += 1.0/(nb_candidates.index(session[i]['application']) + 1) else: nb_total_misses += 1.0 if session[i]['application'] in lu_candidates: lu_total_hits += 1.0 lu_m += 1.0/(lu_candidates.index(session[i]['application']) + 1) else: lu_total_misses += 1.0 if session[i]['application'] in mru_candidates: mru_total_hits += 1.0 mru_m += 1.0/(mru_candidates.index(session[i]['application']) + 1) else: mru_total_misses += 1.0 if session[i]['application'] in mfu_candidates: mfu_total_hits += 1.0 mfu_m += 1.0/(mfu_candidates.index(session[i]['application']) + 1) else: mfu_total_misses += 1.0 mru_candidates = mru_candidates[1:] + [session[i]['application']] l2 = l1 l1 = session[i]['application'] last_app = session[i]['application'] starter = session[0]['application'] terminator = session[-1]['application'] last_date = session[0]['datetime'].day # predictor.update(session) if hits + misses == 0: continue acc = (hits)/(hits + misses) # print unseen_misses print acc, hits, misses, initial_misses, unseen_misses break print k print colored('APEIC', 'cyan'), \ (total_hits)/(total_hits + total_misses), m/(total_hits + total_misses), initial_misses, total_hits, total_misses print colored('NB ', 'cyan'), \ (nb_total_hits)/(nb_total_hits + nb_total_misses), nb_m/(nb_total_hits + nb_total_misses) print colored('LU ', 'cyan'), \ (lu_total_hits)/(lu_total_hits + lu_total_misses), lu_m/(lu_total_hits + lu_total_misses) print colored('MRU ', 'cyan'), \ (mru_total_hits)/(mru_total_hits + mru_total_misses), mru_m/(mru_total_hits + mru_total_misses) print colored('MFU ', 'cyan'), \ (mfu_total_hits)/(mfu_total_hits + mfu_total_misses), mfu_m/(mfu_total_hits + mfu_total_misses)