def create_individual_dict_from_csv(project_short_name = None, user_ids_to_include = None, user_ids_to_exclude = None): """ Create an individual dictionary for each segment Args: project_short_name: Input the project short name user_ids_to_include: Input the user ids to include user_ids_to_exclude: Input the user ids to exclude, i.e. test user ids Returns: task_data dictionary keyed on task_id, row, col, values - link to image, answers for that particular segment or csv file (uncomment code) """ # pull data on tasks (images) and task_runs (classifications) task_run_items = pd.read_csv(project_short_name + '_task_run.csv') tasks = pd.read_csv(project_short_name + '_task.csv') # create output dict task_data = dict() # create dictionary for skip data (if needed) skips = dict() # iterate through all tasks (images) - fastest way to create dictionaries for each image segment for i, task in tasks.iterrows(): task_id = task['task__id'] # find image ID #iterate through all sections of image for row in range(6): for col in range(6): # call function from pybossautils for unique identifier for each segment key = create_key(task_id, row, col) # construct dictionary of segment information # could be a dataframe task_data[key] = {"task_id": task_id, ### Uncomment if doing matlab visual representation # "img_num": id_only, "row": row, "col": col, "link":task["taskinfo__url_b"], 'user_answers': dict() } # create space for skip button info, only needed if skip button present # built as a separate dictionary to avoid storing unneeded data skips[task_id] = { 'count_users':0, 'blank':{'id':[], 'reverse':[]}, 'qual':{'id':[], 'reverse':[]}, 'diff':{'id':[], 'reverse':[]}, } # iterate through all task_runs (classifications) and add data to appropriate section of segment dictionary for i, task_run in task_run_items.iterrows(): task_id = task_run['task_run__task_id'] # links task run to task info, not currently needed # candidate_tasks = [t for j, t in tasks.iterrows() if unicode(t['task__id']) == unicode(task_id)] # task = candidate_tasks[0] # pull user ID and check if its in a list to include/exclude user_id = task_run['task_run__user_id'] if (user_ids_to_include is None and user_ids_to_exclude is None) \ or (user_ids_to_include is not None and user_id in user_ids_to_include) \ or (user_ids_to_exclude is not None and user_id not in user_ids_to_exclude) \ : # If we passed in a list of user ids then check that this user_id is in it # pull info, including answers, must be read through JSON enterpreter task_answers = json.loads(task_run['task_run__info']) # iterate through all segments in this task run and add answers segments = task_answers['squares'] # determine if answers should be skipped due to user action in tasks with skip button isSkipped = 0 if project_short_name in skip_versions: skip_answers = task_answers['skipSlide'] # only skip if user skipped and did not change mind if (skip_answers['reason']) and not (skip_answers['changeOfMind']): isSkipped = 1 if isSkipped == 0: for seg in segments: row = seg['row'] col = seg["column"] answer = seg['answer'] key = create_key(task_id, row, col) # pull info from task dictionary task_data_item = task_data.get(key) # answers stored in full in nested dictionary #user_answers:UID:answer task_data_item['user_answers'][user_id] = {"answer": answer} # if skip button included then extract skip usage info if project_short_name in skip_versions: # iterate through skips dictionary keys created above # if key is part of the string that is the reason # skip_answers = task_answers['skipSlide'] for i in skips[task_id].keys(): if skip_answers['reason'] != None: # reason is none when not skipped if i in skip_answers['reason']: skips[task_id][i]['id'].append(user_id) # create list of users who skip if skip_answers['changeOfMind']: skips[task_id][i]['reverse'].append(user_id) # create list of users who change minds skips[task_id]['count_users']+=1 # counts how many users have seen the image # print skip dictionary to main section dictionary if needed if project_short_name in skip_versions: for i, task in tasks.iterrows(): task_id = task['task__id'] for r in range(6): for c in range(6): key = create_key(task_id, r, c) task_data[key]['skip_ans'] = skips[task_id] ### Return the task data return task_data
tasks = pd.read_csv('tb2-r1.2_task.csv') with open('results_mvp2_1.2.csv','wb') as f: # create file structure and write protocol fieldnamz = ['task_id', 'qual_skip', 'q_reverse', 'q_total', 'blank_skip', 'b_reverse', 'b_total', 'diff_skip', 'd_reverse', 'd_total', 'count'] id_write = csv.DictWriter(f, fieldnames=fieldnamz, lineterminator = '\n') id_write.writeheader() w = csv.writer(f) # iterate through tasks for i, task in tasks.iterrows(): task_id = task['task__id'] row = 0 col = 0 key = create_key(task_id, row, col) # poor quality skip data qual = task_data[key]['skip_ans']['qual'] # calculate number of poor quality skips where user didn't change mind qual_diff = len(qual['id'])-len(qual['reverse']) # same for blank skips blank = task_data[key]['skip_ans']['blank'] blank_diff = len(blank['id'])-len(blank['reverse']) # same for too dfficult skips diff = task_data[key]['skip_ans']['diff'] diff_diff = len(diff['id'])-len(diff['reverse']) # compile the above into a line and write to file dat = [task['taskinfo__original_name'], len(qual['id']), len(qual['reverse']), qual_diff, len(blank['id']), len(blank['reverse']), blank_diff, len(diff['id']), len(diff['reverse']), diff_diff, task_data[key]['skip_ans']['count_users']] w.writerow(dat)