def extract(self, session, info, meta): features = OrderedDict() if info.should_include: total_feedback = self.pos_feedback + self.neg_feedback + self.other_feedback features['pos_feedback'] = self.pos_feedback features['neg_feedback'] = self.neg_feedback features['other_feedback'] = self.other_feedback features['pos_neg_ratio'] = U.savediv(self.pos_feedback, self.neg_feedback, 9999) features['pos_all_ratio'] = U.savediv(self.pos_feedback, total_feedback, 9999) features['total_feedback'] = total_feedback features.update(U.prefix_keys(self.cnt_char_feedback, 'char_')) data = pd.io.json.json_normalize(session.event_data.apply(json.loads)) self.update_feedback(data) return U.prefix_keys(features, 'fb_')
def extract(self, session, info, meta): features = OrderedDict() if info.should_include: for dt in CYCLIC_FEATURES: for angle in ('sin', 'cos'): key = f'ts_{dt}_{angle}' acc = self.acc features[f'{key}_mean'] = np.mean(acc[key]) if acc[key] else 0 features[f'{key}_std'] = np.std(acc[key]) if acc[key] else 0 self.acc[key] += session[key].tolist() return U.prefix_keys(features, 'cycl_')
def extract(self, session, info, meta): features = OrderedDict() if info.should_include: features['var_media'] = sum([0 if not v else 1 for v in self.cnt_media.values()]) features['var_source'] = sum([0 if not v else 1 for v in self.cnt_source.values()]) features['var_level'] = sum([0 if not v else 1 for v in self.cnt_level.values()]) features['var_weight'] = sum([0 if not v else 1 for v in self.cnt_weight.values()]) features['var_size'] = sum([0 if not v else 1 for v in self.cnt_size.values()]) features.update(U.prefix_keys(self.cnt_media.copy(), 'media_')) features.update(U.prefix_keys(self.cnt_source.copy(), 'source_')) features.update(U.prefix_keys(self.cnt_level.copy(), 'level_')) features.update(U.prefix_keys(self.cnt_size.copy(), 'size_')) features.update(U.prefix_keys(self.cnt_weight.copy(), 'weight_')) data = pd.io.json.json_normalize(session.event_data.apply(json.loads)) self.update_round(data) self.update_coord(data) self.update_media(data) self.update_source(data) self.update_levels(data) self.update_sizes(data) self.update_weights(data) return U.prefix_keys(features, 'event_')
def extract(self, session, info, meta): features = OrderedDict() if info.should_include: for name in ('title_event_code', 'title', 'event_code', 'event_id'): cnt = getattr(self, f'cnt_{name}') nonzeros = np.count_nonzero(list(cnt.values())) features[name] = nonzeros self.update_counters(self.cnt_title_event_code, session, 'title_event_code') self.update_counters(self.cnt_title, session, 'title') self.update_counters(self.cnt_event_code, session, 'event_code') self.update_counters(self.cnt_event_id, session, 'event_id') return U.prefix_keys(features, 'var_')
def extract(self, session, info, meta): features = OrderedDict() if info.should_include: for attr in ('months', 'dows', 'doms', 'hours'): for func in (min, max, np.mean, np.std): arr = getattr(self, attr) features[f'{func.__name__}_{attr}'] = U.guard_false(func, arr) self.months.extend(session['ts_Month'].tolist()) self.dows.extend(session['ts_Dayofweek'].tolist()) self.doms.extend(session['ts_Day'].tolist()) self.hours.extend(session['ts_Hour'].tolist()) return U.prefix_keys(features, 'ts_')
def extract(self, session, info, meta): def most_freq(cnt): return max(cnt.items(), key=itemgetter(1))[0] def least_freq(cnt): return min(cnt.items(), key=itemgetter(1))[0] features = OrderedDict() if info.should_include: counters = OrderedDict([ *self.cnt_title_event_code.items(), *self.cnt_title.items(), *self.cnt_event_code.items(), *self.cnt_event_id.items(), *self.cnt_activities.items(), *self.cnt_worlds.items(), *self.cnt_types.items(), *self.cnt_title_worlds.items(), *self.cnt_title_types.items(), *self.cnt_world_types.items() ]) features.update(counters) features['most_freq_title'] = most_freq(self.cnt_title) features['least_freq_title'] = least_freq(self.cnt_title) features['most_freq_world'] = most_freq(self.cnt_worlds) features['least_freq_world'] = least_freq(self.cnt_worlds) features['most_freq_type'] = most_freq(self.cnt_types) features['least_freq_type'] = least_freq(self.cnt_types) features['most_freq_title_world'] = most_freq(self.cnt_title_worlds) features['least_freq_title_world'] = least_freq(self.cnt_title_worlds) features['most_freq_title_type'] = most_freq(self.cnt_title_types) features['least_freq_title_type'] = least_freq(self.cnt_title_types) features['most_freq_world_type'] = most_freq(self.cnt_world_types) features['least_freq_world_type'] = least_freq(self.cnt_world_types) self.update_counters(self.cnt_title_event_code, session, 'title_event_code') self.update_counters(self.cnt_title, session, 'title') self.update_counters(self.cnt_event_code, session, 'event_code') self.update_counters(self.cnt_event_id, session, 'event_id') self.update_counters(self.cnt_worlds, session, 'world') self.update_counters(self.cnt_types, session, 'type') self.update_counters(self.cnt_title_worlds, session, 'title_world') self.update_counters(self.cnt_title_types, session, 'title_type') self.update_counters(self.cnt_world_types, session, 'world_type') if self.last_activity is None or self.last_activity != info.session_type: self.cnt_activities[info.session_type] += 1 self.last_activity = info.session_type return U.prefix_keys(features, 'cnt_')
def extract(self, session, info, meta): features = OrderedDict() if info.should_include: features['acc_attempts_pos'] = self.acc_correct_attempts features['acc_attempts_neg'] = self.acc_incorrect_attempts self.acc_correct_attempts += info.outcomes.pos self.acc_incorrect_attempts += info.outcomes.neg features['acc_accuracy'] = U.savediv(self.acc_accuracy, self.n_rows) accuracy = U.savediv(info.outcomes.pos, info.outcomes.total) self.acc_accuracy += accuracy features.update(self.last_accuracy_title) self.last_accuracy_title[f'acc_{info.session_title}'] = accuracy features['accuracy_group'] = to_accuracy_group(accuracy) self.accuracy_groups[features['accuracy_group']] += 1 features['acc_accuracy_group'] = U.savediv(self.acc_accuracy_group, self.n_rows) self.acc_accuracy_group += features['accuracy_group'] features['acc_actions'] = self.acc_actions event_count = session['event_count'].iloc[-1] features['duration_mean'] = np.mean(self.durations) if self.durations else 0 features['total_duration'] = sum(self.durations) self.durations.append(info.duration_seconds) features['clip_duration_mean'] = U.guard_false(np.mean, self.clip_durations) features['clip_total_duration'] = sum(self.clip_durations) self.clip_durations.append(duration.CLIPS.get(info.session_title, 0)) features['clip_ratio'] = U.savediv( features['clip_total_duration'], features['total_duration']) self.n_rows += 1 self.acc_actions += len(session) if info.should_include: # hack to make sure that target variable is not included into features accuracy_group = features.pop('accuracy_group') features = U.prefix_keys(features, 'perf_') features['accuracy_group'] = accuracy_group return features
def extract(self, session, info, meta): features = OrderedDict() if info.should_include: features.update(U.prefix_keys(self.cnt_month, 'month_')) features.update(U.prefix_keys(self.cnt_dayofweek, 'dow_')) features.update(U.prefix_keys(self.cnt_dayofmonth, 'dom_')) features.update(U.prefix_keys(self.cnt_hour, 'hour_')) features.update(U.prefix_keys(self.cnt_minute, 'minute_')) self.update_counters(self.cnt_month, session, 'ts_Month') self.update_counters(self.cnt_dayofweek, session, 'ts_Dayofweek') self.update_counters(self.cnt_dayofmonth, session, 'ts_Day') self.update_counters(self.cnt_hour, session, 'ts_Hour') self.update_counters(self.cnt_minute, session, 'ts_Minute') return U.prefix_keys(features, 'ts_')