예제 #1
0
def save_exit_resolve_conflicts():
    """
    during record linkage, save and exit the current page
    1. save answered responses to redis
    2. save kapr to mongodb
    """
    user = current_user
    pid = r.get(user.username + '_working_pid')
    assignment_id = pid + '-' + user.username

    user_data_raw = request.form['user_data']
    data_list = user_data_raw.split(';')
    user_data = ''
    for line in data_list:
        if line:
            user_data += ('uid:' + user.username + ',' + line + ';')
    formatted_data = ud.parse_user_data(user_data)

    storage_model.save_working_answers(assignment_id, formatted_data, r)

    # update kapr to db
    KAPR_key = assignment_id + '_KAPR'
    kapr = r.get(KAPR_key)
    storage_model.update_kapr_conflicts(mongo=mongo,
                                        username=user.username,
                                        pid=pid,
                                        kapr=kapr)

    return "data saved."
예제 #2
0
def save_data():
    user = current_user
    pid = r.get(user.username + '_working_pid')
    assignment_id = pid + '-' + user.username

    user_data_raw = request.form['user_data']
    #print(user_data_raw)
    data_list = user_data_raw.split(';')
    user_data = ''
    for line in data_list:
        if line:
            user_data += ('uid:' + user.username + ',' + line + ';')
    formatted_data = ud.parse_user_data(user_data)

    storage_model.save_answers(mongo, pid, user.username, formatted_data)

    #user_data_key = assignment_id + '_user_data'
    #r.append(user_data_key, formatted_data)

    log_data = {
        'username': user.username,
        'timestamp': time.time(),
        'url': '/save_data',
        'pid': str(pid),
        'assignment_id': str(assignment_id),
        'log': user_data_raw,
    }
    storage_model.mlog(mongo=mongo, data=log_data)
    return 'data_saved.'
예제 #3
0
def open_cell():
    user = current_user
    pid = r.get(user.username + '_working_pid')
    assignment_id = pid + '-' + user.username
    is_rc = r.get(user.username + '_working_pid_rc')

    pair_datafile = storage_model.get_project_pair_datafile(mongo=mongo,
                                                            user=user,
                                                            pid=pid)
    working_data = dm.DataPairList(
        data_pairs=dl.load_data_from_csv(pair_datafile))
    project_pairfile = storage_model.get_project_pair_datafile(
        mongo=mongo, user=user.username, pid=pid)
    full_project_pairs = storage_model.get_total_pairs_from_pairfile(
        project_pairfile)
    working_data.set_kapr_size(full_project_pairs)
    full_data = dl.load_data_from_csv(project_pairfile)

    id1 = request.args.get('id1')
    id2 = request.args.get('id2')
    mode = request.args.get('mode')
    pair_num = str(id1.split('-')[0])
    attr_num = str(id1.split('-')[2])

    if is_rc != '1':
        assignment_status = storage_model.get_assignment_status(
            mongo=mongo, username=user.username, pid=pid)
        kapr_limit = float(assignment_status['kapr_limit'])
    # is resolve conflict
    if is_rc == '1':
        kapr_limit = 100

    ret = dm.open_cell(assignment_id, full_data, working_data, pair_num,
                       attr_num, mode, r, kapr_limit)

    log_data = {
        'username': user.username,
        'timestamp': time.time(),
        'url': '/get_cell',
        'pid': str(pid),
        'assignment_id': str(assignment_id),
        'log': json.dumps(ret)
    }
    storage_model.mlog(mongo=mongo, data=log_data)

    return jsonify(ret)
예제 #4
0
def open_big_cell():
    user = current_user
    pid = r.get(user.username + '_working_pid')
    assignment_id = pid + '-' + user.username

    pair_datafile = storage_model.get_pair_datafile(mongo=mongo,
                                                    user=user,
                                                    pid=pid)
    full_data = dl.load_data_from_csv(pair_datafile)
    working_data = dm.DataPairList(
        data_pairs=dl.load_data_from_csv(pair_datafile))
    assignment_status = storage_model.get_assignment_status(
        mongo=mongo, username=user.username, pid=pid)
    kapr_limit = float(assignment_status['kapr_limit'])

    id1 = request.args.get('id1')
    id2 = request.args.get('id2')
    id3 = request.args.get('id3')
    id4 = request.args.get('id4')
    mode = request.args.get('mode')

    pair_num1 = str(id1.split('-')[0])
    attr_num1 = str(id1.split('-')[2])
    ret1 = dm.open_cell(assignment_id, full_data, working_data, pair_num1,
                        attr_num1, mode, r, kapr_limit)
    pair_num2 = str(id3.split('-')[0])
    attr_num2 = str(id3.split('-')[2])
    ret2 = dm.open_cell(assignment_id, full_data, working_data, pair_num2,
                        attr_num2, mode, r, kapr_limit)

    if ret2['result'] == 'fail':
        return jsonify(ret2)

    ret = {
        'value1': ret1['value1'],
        'value2': ret1['value2'],
        'value3': ret2['value1'],
        'value4': ret2['value2'],
        'id': ret1['id'],
        'mode': ret2['mode'],
        'KAPR': ret2['KAPR'],
        'result': ret2['result'],
        'new_delta': ret2['new_delta']
    }

    log_data = {
        'username': user.username,
        'timestamp': time.time(),
        'url': '/get_big_cell',
        'pid': str(pid),
        'assignment_id': str(assignment_id),
        'log': json.dumps(ret)
    }
    storage_model.mlog(mongo=mongo, data=log_data)

    return jsonify(ret)
예제 #5
0
def record_linkage_next(pid):
    """
    update page number to db
    update kapr to db
    flush related cache in redis
    """
    user = current_user
    assignment_id = pid + '-' + user.username

    # find if this project exist
    project = storage_model.get_assignment(mongo=mongo,
                                           username=user.username,
                                           pid=pid)
    if not project:
        return page_not_found('page_not_found')

    # increase page number to db
    storage_model.increase_assignment_page(mongo=mongo,
                                           username=user.username,
                                           pid=pid)

    # increase pair index to db
    storage_model.increase_pair_idx(mongo=mongo,
                                    pid=pid,
                                    username=user.username)

    # update kapr to db
    KAPR_key = assignment_id + '_KAPR'
    kapr = r.get(KAPR_key)
    storage_model.update_kapr(mongo=mongo,
                              username=user.username,
                              pid=pid,
                              kapr=kapr)

    # flush related cache in redis
    # dont flush yet, because resolve conflicts need these data
    # TODO: flush these data when resolve conflict finished
    # storage_model.clear_working_page_cache(assignment_id, r)

    # check if the project is completed
    completed = storage_model.is_project_completed(mongo=mongo, pid=pid)
    if completed:
        storage_model.combine_result(mongo, pid)
        # don't update the result yet, because we use ajax to write result, the result might not been updated
        # if there are conflicts, the result is updated after the resolve_conflict
        indices = storage_model.detect_result_conflicts(mongo, pid)
        if len(indices) == 0:
            storage_model.update_result(mongo=mongo, pid=pid)
        else:
            create_resolve_conflict_project(pid)

        flask.flash('You have completed the project.', 'alert-success')
        return redirect(url_for('project'))

    return redirect(url_for('record_linkage', pid=pid))
예제 #6
0
def resolve_conflicts2_next(pid):
    """
    update page number to db
    update kapr to db
    flush related cache in redis
    """
    user = current_user
    assignment_id = pid + '-' + user.username

    # find if this project exist
    assignment = storage_model.get_conflict_project(mongo=mongo,
                                                    username=user.username,
                                                    pid=pid)
    if not assignment:
        return page_not_found('page_not_found')

    # increase page number to db
    storage_model.increase_conflict_page_pairidx(mongo=mongo,
                                                 username=user.username,
                                                 pid=pid)

    # update kapr to db
    KAPR_key = assignment_id + '_KAPR'
    kapr = r.get(KAPR_key)
    storage_model.update_kapr_conflicts(mongo=mongo,
                                        username=user.username,
                                        pid=pid,
                                        kapr=kapr)

    # flush related cache in redis
    # dont flush yet, because resolve conflicts need these data
    # TODO: flush these data when resolve conflict finished
    # storage_model.clear_working_page_cache(assignment_id, r)

    # check if the project is completed
    completed = storage_model.is_conflict_project_completed(mongo=mongo,
                                                            pid=pid)
    if completed:
        # combine resolve_conflict_result with result
        storage_model.update_resolve_conflicts(mongo, pid)
        # update result file to int_file
        storage_model.update_result(mongo=mongo, pid=pid)
        storage_model.delete_resolve_conflict(mongo, pid)
        # flush redis data
        storage_model.clear_working_page_cache(assignment_id, r)
        flask.flash('You have completed resolve conflicts of this project.',
                    'alert-success')
        return redirect(url_for('project', pid=pid))

    return redirect(url_for('resolve_conflicts2', pid=pid))
예제 #7
0
def save_data_resolve_conflicts():
    user = current_user
    pid = r.get(user.username + '_working_pid')

    user_data_raw = request.form['user_data']
    data_list = user_data_raw.split(';')
    user_data = ''
    for line in data_list:
        if line:
            user_data += ('uid:' + user.username + ',' + line + ';')
    formatted_data = ud.parse_user_data(user_data)

    storage_model.save_resolve_conflicts(mongo, pid, user.username,
                                         formatted_data)

    #storage_model.update_result(mongo=mongo, pid=pid)

    return 'data_saved.'
예제 #8
0
def save_data():
    user = current_user
    pid = r.get(user.username + '_working_pid')
    assignment_id = pid + '-' + user.username

    user_data_raw = request.form['user_data']
    data_list = user_data_raw.split(';')
    user_data = ''
    for line in data_list:
        if line:
            user_data += ('uid:' + user.username + ',' + line + ';')
    formatted_data = ud.parse_user_data(user_data)

    storage_model.save_answers(mongo, pid, user.username, formatted_data)

    user_data_key = assignment_id + '_user_data'
    r.append(user_data_key, formatted_data)
    return 'data_saved.'
예제 #9
0
def create_resolve_conflict_project(pid):
    project = storage_model.get_project_by_pid(mongo, pid)
    owner = project['owner']

    assignment_id = pid + '-' + owner
    # get pair_num of conflicts
    conflict_indices = storage_model.detect_result_conflicts(mongo, pid)

    # get block information of the project
    project = storage_model.get_project_by_pid(mongo=mongo, pid=pid)

    # arrange conflict pairs by block
    block_id = project['block_id']

    # arrange pairs by block id
    conflicts = list()
    for block in block_id:
        cur_block = list()
        for idx in conflict_indices:
            if idx in block:
                cur_block.append(idx)
        if cur_block:
            conflicts.append(cur_block)

    # simulate open cells for those opened by assignees
    pair_datafile = storage_model.get_pair_datafile_by_owner(mongo=mongo,
                                                             owner=owner,
                                                             pid=pid)
    working_data = dm.DataPairList(
        data_pairs=dl.load_data_from_csv(pair_datafile),
        indices=conflict_indices)
    project_pairfile = storage_model.get_project_pair_datafile(mongo=mongo,
                                                               user=owner,
                                                               pid=pid)
    full_project_pairs = storage_model.get_total_pairs_from_pairfile(
        project_pairfile)
    working_data.set_kapr_size(full_project_pairs)
    full_data = dl.load_data_from_csv(project_pairfile)

    KAPR_key = assignment_id + '_KAPR'
    r.set(KAPR_key, 0.0)

    ids_list = working_data.get_ids()
    ids = list(zip(ids_list[0::2], ids_list[1::2]))
    data_mode_list = storage_model.get_conflict_data_mode(
        pid, ids, mongo, r, assignment_id)
    dm.batched_open_cell(assignment_id,
                         full_data,
                         working_data,
                         ids,
                         data_mode_list,
                         r,
                         kapr_limit=100)

    KAPR_key = assignment_id + '_KAPR'
    current_kapr = r.get(KAPR_key)

    result_path = os.path.join(
        config.DATA_DIR, 'internal', project['owner'] + '_' +
        project['project_name'] + '_conflict_result.csv')
    # create result file
    f = open(result_path, 'w+')
    f.close()

    isfull = storage_model.has_full_assignee(mongo, pid)

    conflict_project = {
        'pid': pid,
        'project_name': project['project_name'],
        'pair_num': conflicts,
        'current_page': 0,
        'page_size': len(conflicts),
        'kapr_limit': 100,
        'current_kapr': current_kapr,
        'pair_idx': 0,
        'total_pairs': len(conflict_indices),
        'result_path': result_path,
        'isfull': isfull,
    }

    storage_model.save_conflict_project(mongo, conflict_project)

    return 'block_id'