Beispiel #1
0
def __regenerate_models():
    users = db(db.user_model.id > 0).select()
    for u in users:
        u.last_cat = None
        u.count_pair = 0
        u.num_ideas = 0
        u.count_transition_pairs = 0
        u.transition_graph = '[]'
        u.category_matrix = '{}'
        u.update_record()
    # Get all ideas
    ideas = db(db.idea.id > 0).select()
    # For each idea, get tags and update model
    for i in ideas:
        # Get tags
        tags = db((db.tag_idea.idea == i.id)
                  & (db.tag.id == db.tag_idea.tag)).select(
                      db.tag.tag, groupby=db.tag_idea.id)
        tags = [t.tag for t in tags]
        # Get contextual info
        problem_id = i.problem
        user_id = i.userId
        # Get model and update it
        model = user_models.UserModel(user_id, problem_id)
        model.update(tags)
Beispiel #2
0
def download_ideas():
    __check_auth()
    problem_id = long(request.vars['problem'])
    fields = [
        'user', 'condition', 'id', 'idea', 'timestamp', 'origin', 'sources',
        'tags'
    ]
    # Get records
    rows = db(db.idea.problem == problem_id).select()
    records = []
    for r in rows:
        model = user_models.UserModel(r.userId, problem_id)
        tags = db((db.tag_idea.idea == r.id)
                  & (db.tag.id == db.tag_idea.tag)).select()
        tags = [t.tag.tag for t in tags]
        records.append([
            r.userId,
            model.user_condition,  # condition
            r.id,
            r.idea,
            r.dateAdded,
            r.origin,
            r.sources,
            '|'.join(tags)  # tags 
        ])
    # Return
    filename = 'ideas_%d_%s.csv' % (problem_id, datetime.date.today())
    return __prepare_csv_response(fields, records, filename, problem_id)
Beispiel #3
0
def get_inferred_categories(user_id, problem_id, db):
    '''
    I intended this class to not have to access DB at all, but right now I can't see a better way to do this.
    '''
    user_model = user_models.UserModel(user_id, problem_id)
    all_users = db((db.idea.pool == True) & (db.idea.problem == problem_id)
                   & (db.idea.userId == db.user_info.id)
                   & (db.idea.userId <> user_id)).select(db.idea.userId,
                                                         db.user_info.userId,
                                                         distinct=True)
    models = []
    for u in all_users:
        models.append(user_models.UserModel(u.idea.userId, problem_id))
    nearest_neighbors = find_n_nearest(user_model, models, 5, db)
    # Infer categories from nearest neighbors
    inferred = infer_categories(user_model, nearest_neighbors, True)
    return inferred
Beispiel #4
0
def usermodel():
    __check_auth()
    user_id = request.vars['user']
    problem_id = request.vars['problem']
    if not user_id or not problem_id:
        redirect(URL('stats', 'index'))
    # retrieve contextual info
    user = db(db.user_info.id == user_id).select().first()
    problem = db(db.problem.id == problem_id).select().first()
    if not user or not problem:
        redirect(URL('stats', 'index'))
    # Retrieve user model
    user_model = user_models.UserModel(user.id, problem.id)
    # Get all user models in problem_id
    all_users = __get_users_in_problem(problem_id, [user_id])
    models = []
    for u in all_users:
        models.append(user_models.UserModel(u.idea.userId, problem_id))
    nearest_neighbors = collab_filter.find_n_nearest(user_model, models, 5, db)
    # Infer categories from nearest neighbors
    inferred = collab_filter.infer_categories(user_model, nearest_neighbors,
                                              True)
    # Get logs
    logs = db((db.action_log.problem == problem_id)
              & (db.action_log.userId == user_id)).select()
    return dict(user=user,
                problem=problem,
                model=user_model,
                nearest_neighbors=nearest_neighbors,
                inferred=inferred,
                inferred_json=json.dumps(inferred),
                logs=logs,
                adjacent=', '.join(
                    __handle_unicode(
                        user_model.transition_graph.get_adjacent(
                            user_model.last_cat))),
                inspiration_cats=', '.join(
                    __handle_unicode(
                        user_model.get_inspiration_categories(3))),
                ordered_tags=', '.join(
                    __handle_unicode(user_model.get_ordered_tags())))
Beispiel #5
0
def __get_data(problem_id):
    users = __get_users_in_problem(problem_id)
    # Define fields
    fields = [
        'problem_id',
        'user_id',
        'public_id',
        'initial_login',
        'condition',
        'num_ideas',
        'num_refined',
        'num_combined',
        'num_inspirations',
        'num_clicks_sp',
        'breadth',
        'depth_avg',
        'depth_max',
        'breadth_1',
        'breadth_2',
        'avg_click_i',
        'sp_fulfill',
        'insp_fulfill',
        'category_switch_ratio',
        'model_url',
    ]
    # TODO get data
    records = []
    for u in users:
        user = db(db.user_info.id == u.idea.userId).select().first()
        public_id = user.userId
        initial_login = user.initialLogin
        model = user_models.UserModel(u.idea.userId, problem_id)
        # Get the number of inspiration requests
        num_inspirations = db(
            (db.action_log.problem == problem_id)
            & (db.action_log.userId == u.idea.userId)
            & (db.action_log.actionName == 'get_available_tasks')).count()

        # Get the number of clicks on a solution space cell
        num_clicks_sp = db((db.action_log.problem == problem_id)
                           & (db.action_log.userId == u.idea.userId)
                           & (db.action_log.actionName == 'get_ideas')).select(
                           ).find(lambda r: 'tags' in r.extraInfo)

        # Get the number of refined and combined ideas
        refined = db((db.idea.problem == problem_id)
                     & (db.idea.userId == u.idea.userId)
                     & (db.idea.origin == 'refinement')).count()
        combined = db((db.idea.problem == problem_id)
                      & (db.idea.userId == u.idea.userId)
                      & (db.idea.origin == 'combine')).count()

        breadth_1, breadth_2 = __get_breadth_per_half(user, problem_id)

        # Get fulfillment and related metrics
        fulfill = __get_fulfillment_metrics(user, problem_id)

        # Create record
        user_record = [
            problem_id,
            u.idea.userId,
            public_id,
            initial_login,
            model.user_condition,
            model.get_num_ideas(),
            refined,
            combined,
            num_inspirations,
            len(num_clicks_sp),
            model.get_breadth(),
            model.get_depth_avg(),
            model.get_depth_max(),
            breadth_1,
            breadth_2,
            fulfill['avg_click_i'],
            fulfill['sp_fulfill'],
            fulfill['insp_fulfill'],
            model.category_switch_ratio,
            'http://' + request.env.http_host +
            URL('stats', 'usermodel?problem=%d&user=%d' %
                (problem_id, u.idea.userId)),
        ]
        # Add to array
        records.append(user_record)

    return fields, records
Beispiel #6
0
def __update_user_model(user_id, problem_id, tags, tag_names):
    model = user_models.UserModel(user_id, problem_id)
    tags_names = [tag_names[t] for t in tags]
    model.update(tags_names)
Beispiel #7
0
def __get_tasks(user_id, problem_id):
    # Get tags
    model = user_models.UserModel(user_id, problem_id)
    inspiration_categories, all_inferred = model.get_inspiration_categories(
        NUM_TASKS)
    # Remove duplicates
    inspiration_categories = list(set(inspiration_categories))
    inspiration_categories.extend(all_inferred)

    # Get idea ids that the user already has used as inspiration
    completed_tasks = db((db.task.completed_by == user_id)
                         & (db.task.task_type == 'RatingTask')
                         & (db.task.problem == problem_id)).select(
                             db.task.idea)
    completed_tasks = [row.idea for row in completed_tasks]

    if len(inspiration_categories) < NUM_TASKS:
        # there were not enough inspiration categories. Randomily add more
        tags = [
            t.tag for t in db(db.tag.problem == problem_id).select(
                orderby='<random>')
        ]
        inspiration_categories.extend(tags)

    tasks = []
    for t in inspiration_categories:
        try:
            # Get tag id
            t_id = db((db.tag.problem == problem_id)
                      & (db.tag.tag == t)).select(db.tag.id).first().id
            # Get an idea id
            if t_id:
                idea_id = db((db.tag_idea.tag == t_id)
                             & (~db.tag_idea.idea.belongs(completed_tasks))
                             & (db.tag_idea.idea == db.idea.id)
                             & (db.idea.userId != user_id)
                             & (db.idea.pool == True)).select(
                                 db.tag_idea.idea,
                                 orderby='<random>').first().idea
                if idea_id:
                    # Get a task for this idea and add it to list
                    task = db((db.task.idea == idea_id)
                              & (db.task.idea == db.idea.id)
                              & (db.task.idea == db.tag_idea.idea) &
                              (db.tag_idea.tag == db.tag.id)).select().first()
                    if task:
                        tasks.append(task)
        except Exception:
            pass
        if len(tasks) == NUM_TASKS:
            break

    # Add favorites
    favorites = __get_favorites(user_id)
    for t in tasks:
        if t.idea.id in favorites:
            t.idea.favorite = True
        else:
            t.idea.favorite = False

    # Filter to max number of tasks and return
    return tasks
Beispiel #8
0
def get_versioning_structure():
    ''' 
    Return the tree structure for the versioning panel 
    Structure:
    [
        {
            id:31,
            type: 'combination',
            tags: ['tag1', 'tag2'], 
            children: [
                {
                    id: 01,
                    ...,
                    children: ...
                },
                {
                    id: 91,
                    ...,
                    children: ...
                }
            ]
        }
    ]
    '''
    problem_id = util.get_problem_id(request)
    user_id = session.user_id
    cache_type = 'versioning'
    timestamp = datetime.datetime.min
    complete_idea_map = dict()
    filtered_idea_list = []
    ids = []

    # Retrieve latest cache
    '''
    TODO enable cache again
    Cache is temporarily disabled due to a bug where refinements (and probably mergers) wouldn't be accurately reflected in the updated model.
    E.g. if an idea was refined, the verisoning view would still show the pre-refined idea as the latest version, alongside with the refined idea.
    The solution should involve updating the cached model when you detect a new idea that's the result of of merge or refinement
    '''
    cache = None  # db((db.visualization_cache.problem == problem_id) & (db.visualization_cache.type == cache_type)).select().first()
    if cache:
        json_cache = json.loads(cache.cache, cls=ClassDecoder)
        timestamp = cache.timestamp
        complete_idea_map = json_cache['complete_idea_map']
        filtered_idea_list = json_cache['filtered_idea_list']
        ids = json_cache['ids']

    # get all ideas
    query = ((db.idea.id == db.tag_idea.idea) & (db.tag.id == db.tag_idea.tag)
             & (db.idea.problem == problem_id) &
             (db.idea.dateAdded > timestamp))
    results = db(query).select(
        orderby=db.idea.dateAdded,
        groupby=db.idea.id)  # Ordered from older to newer

    # Get user model and ordered tag sequence
    model = user_models.UserModel(user_id, problem_id)
    ordered_tags = model.get_ordered_tags()

    # build idea map
    for i, r in enumerate(results):
        tags = [t.tag.tag for t in r.idea.tag_idea.select()]
        complete_idea_map[str(r.idea.id)] = Node(tags=tags,
                                                 type=r.idea.origin,
                                                 id=r.idea.id,
                                                 i=i)
        children = []
        if r.idea.sources:
            children = [complete_idea_map[str(c)] for c in r.idea.sources]
            # TODO update previous ideas to reflect their replacement
        complete_idea_map[str(r.idea.id)].children = children
        if not r.idea.replacedBy:
            filtered_idea_list.append(complete_idea_map[str(r.idea.id)])
            ids.append(r.idea.id)

    # Trim nodes to avoid redundancy
    for i in filtered_idea_list:
        __trim_node(i, ids)

    # Update cache
    key = (db.visualization_cache.problem
           == problem_id) & (db.visualization_cache.type == cache_type)
    db.visualization_cache.update_or_insert(
        key,
        problem=problem_id,
        type=cache_type,
        cache=json.dumps(dict(complete_idea_map=complete_idea_map,
                              filtered_idea_list=filtered_idea_list,
                              ids=ids),
                         cls=ClassEncoder),
        timestamp=datetime.datetime.now())

    # sort ideas according to user model
    filtered_idea_list.sort(key=lambda idea: (min(
        [ordered_tags.index(t) for t in idea.tags]), idea.i))

    # Log
    log_action(user_id, problem_id, 'get_versioning_structure',
               {'cache': (cache != None)})

    # return
    response.headers['Content-Type'] = 'application/json'
    return json.dumps(filtered_idea_list, cls=ClassEncoder)
Beispiel #9
0
def get_solution_space():
    user_id = session.user_id
    problem_id = util.get_problem_id(request)
    user_model = user_models.UserModel(user_id, problem_id)
    connections = dict()
    timestamp = datetime.datetime.min
    max_n = 0

    # Retrieve latest cache
    # TODO Cache is temporarily disabled since I change the handling of an idea's pool flag to run the studies. That means each user will have their own cache.
    cache = __get_cache(problem_id)
    if not cache:
        # If no cache, build it
        cache = __build_cache(problem_id, user_model)
    # Get info from cache
    json_cache = json.loads(cache.cache)
    connections = json_cache['connections']
    all_tags = json_cache['tags']
    timestamp = cache.timestamp
    max_n = json_cache['max_n']

    # Get tags ordered by the user model
    tags = user_model.get_ordered_tags()

    # get user's ideas with respective tags
    ideas = db((db.idea.id == db.tag_idea.idea)
               & (db.tag.id == db.tag_idea.tag)
               & (db.idea.problem == problem_id)).select(orderby=~db.idea.id,
                                                         groupby=db.idea.id)

    # extract tags
    for idea in ideas:
        idea_tags = list()
        for tag in idea.idea.tag_idea.select():
            tag = tag.tag.tag.lower()
            idea_tags.append(tag)
            all_tags.append(tag)
        idea_tags.sort()  # this contains a sorted array of tags for idea
        # insert into data structure
        key = '|'.join(idea_tags)
        if key not in connections.keys():
            connections[key] = dict(tags=idea_tags, n=0)
        n = connections[key]['n'] + 1
        connections[key]['n'] = n
        if n > max_n:
            max_n = n
    # tags = tags[:SOLUTION_SPACE_MAX_TAGS]

    # since another user may have added another tag, and since "tags" holds ALL tags, we need to remove those that are not in "ideas"
    final_tags = tags  # start with ordered array of tags
    for t in all_tags:  # Go over all tags
        if t not in final_tags:  # If not already in the array, add it
            final_tags.append(t)

    # Create minimap overview and generate outcome dict
    overview = __generate_birdseye_solutionspace(final_tags,
                                                 connections,
                                                 max_n=max_n)
    outcome = json.dumps(
        dict(tags=final_tags,
             connections=connections,
             max_n=max_n,
             overview=overview))

    # Log
    log_action(user_id, problem_id, 'get_solution_space', {
        'cache': (cache != None),
        'tags': final_tags
    })

    return outcome
def __update_user_model(user_id, problem_id, tags):
    model = user_models.UserModel(user_id, problem_id)
    model.update(tags)