def _stay(group):
     value, confidence = binomial_confidence_mean(group['stay'])
     return {
         'value': value,
         'confidence_interval_min': confidence[0],
         'confidence_interval_max': confidence[1],
     }
 def _weighted_mean(xs):
     value, confidence = binomial_confidence_mean(xs)
     return {
         'value': value,
         'confidence_interval_min': confidence[0],
         'confidence_interval_max': confidence[1],
         'size': len(xs),
     }
 def _progress_confidence(i):
     xs = map(lambda x: x > i, user_answers)
     value, confidence = binomial_confidence_mean(xs)
     return {
         'value': value,
         'confidence_interval_min': confidence[0],
         'confidence_interval_max': confidence[1],
     }
def returning(data):
    returning = data.groupby('user_id').apply(lambda g: g['session_number'].max() > 0).values
    value, confidence = binomial_confidence_mean(returning)
    return {
        'value': value,
        'confidence_interval_min': confidence[0],
        'confidence_interval_max': confidence[1],
        'size': len(returning),
    }
예제 #5
0
 def get_stats(self, experiment_setup_ids, answers_per_user=10, learning_curve_length=5, learning_curve_max_users=1000):
     with closing(connection.cursor()) as cursor:
         cursor.execute(
             '''
             SELECT
                 proso_configab_answerexperimentsetup.experiment_setup_id,
                 proso_models_answer.user_id,
                 COUNT(proso_models_answer.id) as number_of_answers,
                 COUNT(DISTINCT(proso_models_answer.session_id)) number_of_sessions
             FROM proso_models_answer
             INNER JOIN proso_configab_answerexperimentsetup ON proso_configab_answerexperimentsetup.answer_id = proso_models_answer.id
             WHERE proso_configab_answerexperimentsetup.experiment_setup_id IN (''' + ', '.join(['%s' for _ in experiment_setup_ids]) + ''')
             GROUP BY proso_configab_answerexperimentsetup.experiment_setup_id, proso_models_answer.user_id
             HAVING COUNT(proso_models_answer.id) > %s
             ''',
             experiment_setup_ids + [answers_per_user]
         )
         fetched = defaultdict(list)
         experiment_users = defaultdict(set)
         for row in cursor:
             experiment_users[row[0]].add(row[1])
             fetched[row[0]].append({
                 'number_of_answers': row[2],
                 'number_of_sessions': row[3]
             })
         result = {}
         for experiment_setup_id in experiment_setup_ids:
             if experiment_setup_id in fetched:
                 data = fetched[experiment_setup_id]
                 users = experiment_users[experiment_setup_id]
                 result[experiment_setup_id] = {
                     'number_of_users': len(data),
                     'number_of_answers': confidence_value_to_json(confidence_median([d['number_of_answers'] for d in data])),
                     'returning_chance': confidence_value_to_json(
                         binomial_confidence_mean([d['number_of_sessions'] > 1 for d in data])),
                     'learning_curve': learning_curve(learning_curve_length, users=users, number_of_users=learning_curve_max_users),
                     'learning_curve_all_users': learning_curve(learning_curve_length, users=users, number_of_users=learning_curve_max_users, user_length=1)
                 }
             else:
                 result[experiment_setup_id] = {
                     'number_of_users': 0,
                     'number_of_answers_median': None,
                     'returning_chance': None,
                 }
         return result
def attrition_bias(data, length=6, context_answer_limit=100):

    def _attrition_bias(group):
        if len(group) < context_answer_limit:
            return []
        user_answers_dict = defaultdict(list)
        for row in iterdicts(group):
            user_answers_dict[row['user_id']].append(row['item_asked_id'] != row['item_answered_id'])
        return user_answers_dict.values()

    user_answers = [answers for context_answers in data.groupby(['context_name', 'term_type']).apply(_attrition_bias) for answers in context_answers]
    result = []
    for i in range(length):
        value, confidence = binomial_confidence_mean([answers[0] for answers in user_answers if len(answers) > i])
        result.append({
            'value': value,
            'confidence_interval_min': confidence[0],
            'confidence_interval_max': confidence[1],
        })
    return result
예제 #7
0
 def get_stats(self,
               experiment_setup_ids,
               answers_per_user=10,
               learning_curve_length=5,
               learning_curve_max_users=1000):
     with closing(connection.cursor()) as cursor:
         cursor.execute(
             '''
             SELECT
                 proso_configab_answerexperimentsetup.experiment_setup_id,
                 proso_models_answer.user_id,
                 COUNT(proso_models_answer.id) as number_of_answers,
                 COUNT(DISTINCT(proso_models_answer.session_id)) number_of_sessions
             FROM proso_models_answer
             INNER JOIN proso_configab_answerexperimentsetup ON proso_configab_answerexperimentsetup.answer_id = proso_models_answer.id
             WHERE proso_configab_answerexperimentsetup.experiment_setup_id IN ('''
             + ', '.join(['%s' for _ in experiment_setup_ids]) + ''')
             GROUP BY proso_configab_answerexperimentsetup.experiment_setup_id, proso_models_answer.user_id
             HAVING COUNT(proso_models_answer.id) > %s
             ''', experiment_setup_ids + [answers_per_user])
         fetched = defaultdict(list)
         experiment_users = defaultdict(set)
         for row in cursor:
             experiment_users[row[0]].add(row[1])
             fetched[row[0]].append({
                 'number_of_answers': row[2],
                 'number_of_sessions': row[3]
             })
         result = {}
         for experiment_setup_id in experiment_setup_ids:
             if experiment_setup_id in fetched:
                 data = fetched[experiment_setup_id]
                 users = experiment_users[experiment_setup_id]
                 result[experiment_setup_id] = {
                     'number_of_users':
                     len(data),
                     'number_of_answers':
                     confidence_value_to_json(
                         confidence_median(
                             [d['number_of_answers'] for d in data])),
                     'returning_chance':
                     confidence_value_to_json(
                         binomial_confidence_mean(
                             [d['number_of_sessions'] > 1 for d in data])),
                     'learning_curve':
                     learning_curve(
                         learning_curve_length,
                         users=users,
                         number_of_users=learning_curve_max_users),
                     'learning_curve_all_users':
                     learning_curve(
                         learning_curve_length,
                         users=users,
                         number_of_users=learning_curve_max_users,
                         user_length=1)
                 }
             else:
                 result[experiment_setup_id] = {
                     'number_of_users': 0,
                     'number_of_answers_median': None,
                     'returning_chance': None,
                 }
         return result
예제 #8
0
 def _mean_with_confidence(xs):
     return confidence_value_to_json(binomial_confidence_mean([x for x in xs if x is not None]))