import pandas as pd from unipath import Path from __init__ import unfold, survey_dir, csv_output_dir responses = pd.read_json(Path(survey_dir, 'responses.json')) for response_field in ['selection', 'question']: responses = unfold(responses, response_field) del responses['fields'] responses = responses[['selection', 'question']] responses.rename(columns={'question': 'question_id'}, inplace=True) questions = pd.read_csv(Path(survey_dir, 'questions.csv')) responses = responses.merge(questions) responses = responses[[ 'survey_id', 'survey_label', 'question_id', 'message_id', 'generation', 'game_name', 'chain_name', 'answer', 'selection', ]]
import pandas as pd from unipath import Path from __init__ import unfold, survey_dir, csv_output_dir responses = pd.read_json(Path(survey_dir, 'responses.json')) for response_field in ['selection', 'question']: responses = unfold(responses, response_field) del responses['fields'] responses = responses[['selection', 'question']] responses.rename(columns={'question': 'question_id'}, inplace=True) questions = pd.read_csv(Path(survey_dir, 'questions.csv')) responses = responses.merge(questions) responses = responses[[ 'survey_id', 'survey_label', 'question_id', 'message_id', 'generation', 'game_name', 'chain_name', 'answer', 'selection', ]] responses.to_csv(Path(csv_output_dir, 'responses.csv'), index=False)
import pandas as pd from unipath import Path from __init__ import unfold, survey_dir messages = pd.read_json(Path(survey_dir, 'messages.json')) del messages['model'] for message_field in [ 'generation', 'num_children', 'audio', 'chain', 'parent' ]: messages = unfold(messages, message_field) del messages['fields'] def extract_from_path(frame, path_col, name, index): frame[name] = frame[path_col].str.split('/').str.get(index) return frame for i, name in enumerate(['game_name', 'chain_name', 'message_name']): messages = extract_from_path(messages, 'audio', name, i) messages = messages.ix[messages.game_name != 'test-game'] messages = messages.sort(['game_name', 'chain_name', 'message_name']) messages = messages.rename(columns={ 'pk': 'message_id',
import pandas as pd from unipath import Path from __init__ import unfold, survey_dir questions = pd.read_json(Path(survey_dir, 'questions.json')) for question_field in ['choices', 'given', 'survey', 'answer']: questions = unfold(questions, question_field) del questions['fields'] del questions['model'] del questions['choices'] questions.rename(columns={'pk': 'question_id', 'survey': 'survey_id', 'given': 'message_id'}, inplace=True) survey_info = pd.DataFrame({ 'survey_id': [1, 2, 3, 4, 12, 13], 'survey_label': ['between', 'within', 'between', 'within', 'between-splish', 'within-splish'] }) questions = questions.merge(survey_info) messages = pd.read_csv(Path(survey_dir, 'messages.csv')) def pick_ancestor(message): if message in [1,2,3,4] + [138,139,140,141]: return message else: parent = messages.ix[messages.message_id == message, 'parent_id'] return pick_ancestor(int(parent))
import pandas as pd from unipath import Path from __init__ import unfold, survey_dir messages = pd.read_json(Path(survey_dir, 'messages.json')) del messages['model'] for message_field in ['generation', 'num_children', 'audio', 'chain', 'parent']: messages = unfold(messages, message_field) del messages['fields'] def extract_from_path(frame, path_col, name, index): frame[name] = frame[path_col].str.split('/').str.get(index) return frame for i, name in enumerate(['game_name', 'chain_name', 'message_name']): messages = extract_from_path(messages, 'audio', name, i) messages = messages.ix[messages.game_name != 'test-game'] messages = messages.sort(['game_name', 'chain_name', 'message_name']) messages = messages.rename(columns={'pk': 'message_id', 'chain': 'chain_id', 'parent': 'parent_id'}) messages.to_csv(Path(survey_dir, 'messages.csv'), index=False)