def _stay(group): value, confidence = binomial_confidence_mean(group['stay']) return { 'value': value, 'confidence_interval_min': confidence[0], 'confidence_interval_max': confidence[1], }
def _weighted_mean(xs): value, confidence = binomial_confidence_mean(xs) return { 'value': value, 'confidence_interval_min': confidence[0], 'confidence_interval_max': confidence[1], 'size': len(xs), }
def _progress_confidence(i): xs = map(lambda x: x > i, user_answers) value, confidence = binomial_confidence_mean(xs) return { 'value': value, 'confidence_interval_min': confidence[0], 'confidence_interval_max': confidence[1], }
def returning(data): returning = data.groupby('user_id').apply(lambda g: g['session_number'].max() > 0).values value, confidence = binomial_confidence_mean(returning) return { 'value': value, 'confidence_interval_min': confidence[0], 'confidence_interval_max': confidence[1], 'size': len(returning), }
def get_stats(self, experiment_setup_ids, answers_per_user=10, learning_curve_length=5, learning_curve_max_users=1000): with closing(connection.cursor()) as cursor: cursor.execute( ''' SELECT proso_configab_answerexperimentsetup.experiment_setup_id, proso_models_answer.user_id, COUNT(proso_models_answer.id) as number_of_answers, COUNT(DISTINCT(proso_models_answer.session_id)) number_of_sessions FROM proso_models_answer INNER JOIN proso_configab_answerexperimentsetup ON proso_configab_answerexperimentsetup.answer_id = proso_models_answer.id WHERE proso_configab_answerexperimentsetup.experiment_setup_id IN (''' + ', '.join(['%s' for _ in experiment_setup_ids]) + ''') GROUP BY proso_configab_answerexperimentsetup.experiment_setup_id, proso_models_answer.user_id HAVING COUNT(proso_models_answer.id) > %s ''', experiment_setup_ids + [answers_per_user] ) fetched = defaultdict(list) experiment_users = defaultdict(set) for row in cursor: experiment_users[row[0]].add(row[1]) fetched[row[0]].append({ 'number_of_answers': row[2], 'number_of_sessions': row[3] }) result = {} for experiment_setup_id in experiment_setup_ids: if experiment_setup_id in fetched: data = fetched[experiment_setup_id] users = experiment_users[experiment_setup_id] result[experiment_setup_id] = { 'number_of_users': len(data), 'number_of_answers': confidence_value_to_json(confidence_median([d['number_of_answers'] for d in data])), 'returning_chance': confidence_value_to_json( binomial_confidence_mean([d['number_of_sessions'] > 1 for d in data])), 'learning_curve': learning_curve(learning_curve_length, users=users, number_of_users=learning_curve_max_users), 'learning_curve_all_users': learning_curve(learning_curve_length, users=users, number_of_users=learning_curve_max_users, user_length=1) } else: result[experiment_setup_id] = { 'number_of_users': 0, 'number_of_answers_median': None, 'returning_chance': None, } return result
def attrition_bias(data, length=6, context_answer_limit=100): def _attrition_bias(group): if len(group) < context_answer_limit: return [] user_answers_dict = defaultdict(list) for row in iterdicts(group): user_answers_dict[row['user_id']].append(row['item_asked_id'] != row['item_answered_id']) return user_answers_dict.values() user_answers = [answers for context_answers in data.groupby(['context_name', 'term_type']).apply(_attrition_bias) for answers in context_answers] result = [] for i in range(length): value, confidence = binomial_confidence_mean([answers[0] for answers in user_answers if len(answers) > i]) result.append({ 'value': value, 'confidence_interval_min': confidence[0], 'confidence_interval_max': confidence[1], }) return result
def get_stats(self, experiment_setup_ids, answers_per_user=10, learning_curve_length=5, learning_curve_max_users=1000): with closing(connection.cursor()) as cursor: cursor.execute( ''' SELECT proso_configab_answerexperimentsetup.experiment_setup_id, proso_models_answer.user_id, COUNT(proso_models_answer.id) as number_of_answers, COUNT(DISTINCT(proso_models_answer.session_id)) number_of_sessions FROM proso_models_answer INNER JOIN proso_configab_answerexperimentsetup ON proso_configab_answerexperimentsetup.answer_id = proso_models_answer.id WHERE proso_configab_answerexperimentsetup.experiment_setup_id IN (''' + ', '.join(['%s' for _ in experiment_setup_ids]) + ''') GROUP BY proso_configab_answerexperimentsetup.experiment_setup_id, proso_models_answer.user_id HAVING COUNT(proso_models_answer.id) > %s ''', experiment_setup_ids + [answers_per_user]) fetched = defaultdict(list) experiment_users = defaultdict(set) for row in cursor: experiment_users[row[0]].add(row[1]) fetched[row[0]].append({ 'number_of_answers': row[2], 'number_of_sessions': row[3] }) result = {} for experiment_setup_id in experiment_setup_ids: if experiment_setup_id in fetched: data = fetched[experiment_setup_id] users = experiment_users[experiment_setup_id] result[experiment_setup_id] = { 'number_of_users': len(data), 'number_of_answers': confidence_value_to_json( confidence_median( [d['number_of_answers'] for d in data])), 'returning_chance': confidence_value_to_json( binomial_confidence_mean( [d['number_of_sessions'] > 1 for d in data])), 'learning_curve': learning_curve( learning_curve_length, users=users, number_of_users=learning_curve_max_users), 'learning_curve_all_users': learning_curve( learning_curve_length, users=users, number_of_users=learning_curve_max_users, user_length=1) } else: result[experiment_setup_id] = { 'number_of_users': 0, 'number_of_answers_median': None, 'returning_chance': None, } return result
def _mean_with_confidence(xs): return confidence_value_to_json(binomial_confidence_mean([x for x in xs if x is not None]))