def open_big_cell(): user = current_user pid = r.get(user.username + '_working_pid') assignment_id = pid + '-' + user.username pair_datafile = storage_model.get_pair_datafile(mongo=mongo, user=user, pid=pid) full_data = dl.load_data_from_csv(pair_datafile) working_data = dm.DataPairList( data_pairs=dl.load_data_from_csv(pair_datafile)) assignment_status = storage_model.get_assignment_status( mongo=mongo, username=user.username, pid=pid) kapr_limit = float(assignment_status['kapr_limit']) id1 = request.args.get('id1') id2 = request.args.get('id2') id3 = request.args.get('id3') id4 = request.args.get('id4') mode = request.args.get('mode') pair_num1 = str(id1.split('-')[0]) attr_num1 = str(id1.split('-')[2]) ret1 = dm.open_cell(assignment_id, full_data, working_data, pair_num1, attr_num1, mode, r, kapr_limit) pair_num2 = str(id3.split('-')[0]) attr_num2 = str(id3.split('-')[2]) ret2 = dm.open_cell(assignment_id, full_data, working_data, pair_num2, attr_num2, mode, r, kapr_limit) if ret2['result'] == 'fail': return jsonify(ret2) ret = { 'value1': ret1['value1'], 'value2': ret1['value2'], 'value3': ret2['value1'], 'value4': ret2['value2'], 'id': ret1['id'], 'mode': ret2['mode'], 'KAPR': ret2['KAPR'], 'result': ret2['result'], 'new_delta': ret2['new_delta'] } log_data = { 'username': user.username, 'timestamp': time.time(), 'url': '/get_big_cell', 'pid': str(pid), 'assignment_id': str(assignment_id), 'log': json.dumps(ret) } storage_model.mlog(mongo=mongo, data=log_data) return jsonify(ret)
def resolve_conflicts(pid): user = current_user assignment_id = pid + '-' + user.username indices = storage_model.detect_result_conflicts(mongo, pid) pair_datafile = storage_model.get_pair_datafile(mongo=mongo, user=user, pid=pid) working_data = dm.DataPairList( data_pairs=dl.load_data_from_csv(pair_datafile), indices=indices) icons = working_data.get_icons() ids_list = working_data.get_ids() ids = list(zip(ids_list[0::2], ids_list[1::2])) pairs_formatted = working_data.get_data_display('full') data = list(zip(pairs_formatted[0::2], pairs_formatted[1::2])) ret_data = { 'data': data, 'icons': icons, 'ids': ids, 'title': project['project_name'], 'this_url': '/record_linkage/' + pid, 'next_url': '/project/' + pid, 'pid': pid, 'data_size': len(data), } return render_template('resolve_conflicts.html', data=ret_data)
def open_cell(): user = current_user pid = r.get(user.username + '_working_pid') assignment_id = pid + '-' + user.username is_rc = r.get(user.username + '_working_pid_rc') pair_datafile = storage_model.get_project_pair_datafile(mongo=mongo, user=user, pid=pid) working_data = dm.DataPairList( data_pairs=dl.load_data_from_csv(pair_datafile)) project_pairfile = storage_model.get_project_pair_datafile( mongo=mongo, user=user.username, pid=pid) full_project_pairs = storage_model.get_total_pairs_from_pairfile( project_pairfile) working_data.set_kapr_size(full_project_pairs) full_data = dl.load_data_from_csv(project_pairfile) id1 = request.args.get('id1') id2 = request.args.get('id2') mode = request.args.get('mode') pair_num = str(id1.split('-')[0]) attr_num = str(id1.split('-')[2]) if is_rc != '1': assignment_status = storage_model.get_assignment_status( mongo=mongo, username=user.username, pid=pid) kapr_limit = float(assignment_status['kapr_limit']) # is resolve conflict if is_rc == '1': kapr_limit = 100 ret = dm.open_cell(assignment_id, full_data, working_data, pair_num, attr_num, mode, r, kapr_limit) log_data = { 'username': user.username, 'timestamp': time.time(), 'url': '/get_cell', 'pid': str(pid), 'assignment_id': str(assignment_id), 'log': json.dumps(ret) } storage_model.mlog(mongo=mongo, data=log_data) return jsonify(ret)
def show_pratice_moderate_mode(): pairs = dl.load_data_from_csv('data/practice_moderate_mode.csv') pairs_formatted = dd.format_data(pairs, 'moderate') data = zip(pairs_formatted[0::2], pairs_formatted[1::2]) icons = dd.generate_icon(pairs) return render_template('record_linkage_d.html', data=data, icons=icons, title='Section 1: practice', thisurl='/practice/moderate_mode', page_number=14)
def grade_pratice_full_mode(table_mode): data_file = 'practice_' + str(table_mode) + '.csv' ret = list() responses = request.args.get('response').split(',') pairs = dl.load_data_from_csv('data/' + data_file) j = 0 all_correct = True for i in range(0, len(pairs), 2): result = False j += 1 q = 'q' + str(j) answer = pairs[i][17] if answer == '1' and (q + 'a4' in responses or q + 'a5' in responses or q + 'a6' in responses): result = True if answer == '0' and (q + 'a1' in responses or q + 'a2' in responses or q + 'a3' in responses): result = True if not result: ret.append('<div>' + pairs[i][18] + '</div>') all_correct = False if all_correct: ret.append('<div>Good job!</div>') return jsonify(result=ret)
def record_linkage(pid): user = current_user # find if this project exist project = storage_model.get_assignment(mongo=mongo, username=user.username, pid=pid) if not project: return page_not_found('page_not_found') # username and project_id can identify an assignment assignment_id = pid + '-' + user.username # get assignment status assignment_status = storage_model.get_assignment_status( mongo=mongo, username=user.username, pid=pid) current_page = assignment_status['current_page'] page_size = assignment_status['page_size'] kapr_limit = assignment_status['kapr_limit'] current_kapr = assignment_status['current_kapr'] display_mode = assignment_status['display_mode'] isfull = False if 'isfull' in assignment_status: isfull = assignment_status['isfull'] if isfull == 'true': default_mode = 'B' else: default_mode = 'M' if current_page >= page_size: flask.flash('You have completed the project.', 'alert-success') return redirect(url_for('project')) # get working data and full data pair_datafile = storage_model.get_pair_datafile(mongo=mongo, user=user, pid=pid) indices, pair_idx = storage_model.get_current_block(mongo=mongo, pid=pid, assignee=user.username) working_data = dm.DataPairList( data_pairs=dl.load_data_from_csv(pair_datafile), indices=indices) project_pairfile = storage_model.get_project_pair_datafile( mongo=mongo, user=user.username, pid=pid) full_project_pairs = storage_model.get_total_pairs_from_pairfile( project_pairfile) working_data.set_kapr_size(full_project_pairs) full_data = dl.load_data_from_csv(project_pairfile) # prepare return data icons = working_data.get_icons() #print(icons) ids_list = working_data.get_ids() ids = list(zip(ids_list[0::2], ids_list[1::2])) data_mode = display_mode.lower() data_mode_list = storage_model.get_data_mode(assignment_id, ids, r=r, data_mode=data_mode, default_mode=default_mode) pairs_formatted = working_data.get_data_display(data_mode, data_mode_list) data = list(zip(pairs_formatted[0::2], pairs_formatted[1::2])) pair_ids = indices record_ids = storage_model.get_record_id_by_pair_id(mongo, pid, indices) # get the delta information delta = list() for i in range(working_data.size()): data_pair = working_data.get_data_pair_by_index(i) if data_pair is None: break delta += dm.KAPR_delta(full_data, data_pair, 11 * ['M'], len(full_data)) # prepare cache data for ajax query r.set(user.username + '_working_pid', pid) r.set(user.username + '_working_pid_rc', 0) KAPR_key = assignment_id + '_KAPR' r.set(KAPR_key, float(current_kapr)) # get saved working answers answers = storage_model.get_working_answers(assignment_id, r) ret_data = { 'data': data, 'data_mode_list': data_mode_list, 'icons': icons, 'ids': ids, 'record_ids': record_ids, 'pair_ids': pair_ids, 'title': project['project_name'], 'kapr': round(100 * float(current_kapr), 1), 'kapr_limit': float(kapr_limit), 'page_number': current_page + 1, 'page_size': page_size, 'pair_num_base': pair_idx + 1, 'delta': delta, 'this_url': '/record_linkage/' + pid, 'saved_answers': answers, 'data_size': len(data), 'isfull': isfull, } return render_template('record_linkage_ppirl.html', data=ret_data)
def resolve_conflicts2(pid): user = current_user # find if this project exist assignment = storage_model.get_conflict_project(mongo=mongo, username=user.username, pid=pid) if not assignment: return page_not_found('page_not_found') # username and project_id can identify an assignment assignment_id = pid + '-' + user.username # get assignment status current_page = assignment['current_page'] page_size = int(assignment['page_size']) kapr_limit = assignment['kapr_limit'] current_kapr = assignment['current_kapr'] if current_page >= page_size: flask.flash('You have completed the project.', 'alert-success') return redirect(url_for('project')) # get working data and full data pair_datafile = storage_model.get_project_pair_datafile(mongo=mongo, user=user.username, pid=pid) pair_idx = assignment['pair_idx'] indices = assignment['pair_num'][current_page] working_data = dm.DataPairList( data_pairs=dl.load_data_from_csv(pair_datafile), indices=indices) project_pairfile = storage_model.get_project_pair_datafile( mongo=mongo, user=user.username, pid=pid) full_project_pairs = storage_model.get_total_pairs_from_pairfile( project_pairfile) working_data.set_kapr_size(full_project_pairs) full_data = dl.load_data_from_csv(project_pairfile) isfull = assignment['isfull'] # prepare return data icons = working_data.get_icons() ids_list = working_data.get_ids() ids = list(zip(ids_list[0::2], ids_list[1::2])) data_mode = 'masked' data_mode_list = storage_model.get_conflict_data_mode( pid, ids, mongo, r, assignment_id, isfull) pairs_formatted = working_data.get_data_display(data_mode, data_mode_list) data = list(zip(pairs_formatted[0::2], pairs_formatted[1::2])) record_ids = storage_model.get_record_id_by_pair_id(mongo, pid, indices) # get the delta information delta = list() for i in range(working_data.size()): data_pair = working_data.get_data_pair_by_index(i) if data_pair is None: break delta += dm.KAPR_delta(full_data, data_pair, 11 * ['M'], len(full_data)) # prepare cache data for ajax query r.set(user.username + '_working_pid', pid) r.set(user.username + '_working_pid_rc', 1) KAPR_key = assignment_id + '_KAPR' r.set(KAPR_key, float(current_kapr)) # get saved working answers answers = storage_model.get_working_answers(assignment_id, r) # get users' choices information choices, choice_cnt = storage_model.get_users_choices(mongo=mongo, pid=pid, indices=indices) print(choices) ret_data = { 'data': data, 'data_mode_list': data_mode_list, 'icons': icons, 'ids': ids, 'pair_ids': indices, 'record_ids': record_ids, 'title': 'resolve conflicts', 'kapr': round(100 * float(current_kapr), 1), 'kapr_limit': kapr_limit, 'page_number': current_page + 1, 'page_size': page_size, 'pair_num_base': pair_idx + 1, 'delta': delta, 'this_url': '/resolve_conflicts2/' + pid, 'saved_answers': answers, 'data_size': len(data), 'choices': choices, 'choice_cnt': choice_cnt, 'isfull': isfull, } return render_template('resolve_conflicts2.html', data=ret_data)
def create_resolve_conflict_project(pid): project = storage_model.get_project_by_pid(mongo, pid) owner = project['owner'] assignment_id = pid + '-' + owner # get pair_num of conflicts conflict_indices = storage_model.detect_result_conflicts(mongo, pid) # get block information of the project project = storage_model.get_project_by_pid(mongo=mongo, pid=pid) # arrange conflict pairs by block block_id = project['block_id'] # arrange pairs by block id conflicts = list() for block in block_id: cur_block = list() for idx in conflict_indices: if idx in block: cur_block.append(idx) if cur_block: conflicts.append(cur_block) # simulate open cells for those opened by assignees pair_datafile = storage_model.get_pair_datafile_by_owner(mongo=mongo, owner=owner, pid=pid) working_data = dm.DataPairList( data_pairs=dl.load_data_from_csv(pair_datafile), indices=conflict_indices) project_pairfile = storage_model.get_project_pair_datafile(mongo=mongo, user=owner, pid=pid) full_project_pairs = storage_model.get_total_pairs_from_pairfile( project_pairfile) working_data.set_kapr_size(full_project_pairs) full_data = dl.load_data_from_csv(project_pairfile) KAPR_key = assignment_id + '_KAPR' r.set(KAPR_key, 0.0) ids_list = working_data.get_ids() ids = list(zip(ids_list[0::2], ids_list[1::2])) data_mode_list = storage_model.get_conflict_data_mode( pid, ids, mongo, r, assignment_id) dm.batched_open_cell(assignment_id, full_data, working_data, ids, data_mode_list, r, kapr_limit=100) KAPR_key = assignment_id + '_KAPR' current_kapr = r.get(KAPR_key) result_path = os.path.join( config.DATA_DIR, 'internal', project['owner'] + '_' + project['project_name'] + '_conflict_result.csv') # create result file f = open(result_path, 'w+') f.close() isfull = storage_model.has_full_assignee(mongo, pid) conflict_project = { 'pid': pid, 'project_name': project['project_name'], 'pair_num': conflicts, 'current_page': 0, 'page_size': len(conflicts), 'kapr_limit': 100, 'current_kapr': current_kapr, 'pair_idx': 0, 'total_pairs': len(conflict_indices), 'result_path': result_path, 'isfull': isfull, } storage_model.save_conflict_project(mongo, conflict_project) return 'block_id'
import logging import math import data_loader as dl import data_display as dd import data_model as dm import config app = Flask(__name__) if config.ENV == 'production': r = redis.from_url(os.environ.get("REDIS_URL")) elif config.ENV == 'development': r = redis.Redis(host='localhost', port=6379, db=0) # global data, this should be common across all users, not affected by multiple process DATASET = dl.load_data_from_csv('data/section2.csv') DATA_PAIR_LIST = dm.DataPairList( data_pairs=dl.load_data_from_csv('data/ppirl.csv')) def state_machine(function_name): def wrapper(f): @wraps(f) def inner_wrapper(*args, **kwargs): sequence = config.SEQUENCE for i in range(len(sequence)): if sequence[i] == function_name: session['state'] = i break return f(*args, **kwargs)
def load_database_from_csv(file_name, database_name): data = dl.load_data_from_csv(file_name) databases[database_name] = data