def export_surveydata(): """ :return: """ body = [] subsession_fns = [] player_fns = [] session_fns, session_fns_d, group_fns, group_fns_d, participant_fns, participant_fns_d = get_headers_simple( ) sessions = Session.objects.order_by("code") for session in sessions: if not session.config["name"] == "duopoly_rep_treat": continue session_list = list_from_obj(session_fns, session) subsessions = [ ss for ss in session.get_subsessions() if ss.__class__._meta.app_config.name == "survey" ] for subsession in subsessions: # print(subsession._meta.app_config.name) subsession_fns = subsession_fns or get_field_names_for_csv( subsession.__class__) subsession_list = list_from_obj(subsession_fns, subsession) players = sorted(subsession.get_players(), key=lambda x: x.participant.id_in_session) for player in players: player_fns = player_fns or get_field_names_for_csv( player.__class__) participant = player.participant player_list = list_from_obj(participant_fns, participant) + list_from_obj( player_fns, player) body.append(session_list + subsession_list + player_list) headers = session_fns_d + subsession_fns + participant_fns_d + player_fns return headers, body
def export_marketdata(): """ :return: """ maxdim = max(Constants.treatmentdims) body = [] session_fns, session_fns_d, group_fns, group_fns_d, participant_fns, participant_fns_d = get_headers_simple( ) subsession_fns = ["round_number", "realround", "treatment"] group_fns = get_field_names_for_csv(Group) market_fns = get_market_headers(maxdim) headers = session_fns_d + subsession_fns + group_fns + market_fns sessions = Session.objects.order_by("code") for session in sessions: if not session.config["name"] == "duopoly_rep_treat": continue session_list = list_from_obj(session_fns, session) # I believe this method excludes subsessions from other apps, and thus we do not need to filter on app name subsessions = Subsession.objects.filter(session=session) for subsession in subsessions: subsession_list = list_from_obj(subsession_fns, subsession) groups = subsession.get_groups() for group in groups: group_list = list_from_obj(group_fns, group) market_list = get_market_row(group, subsession.dims, maxdim) # players = sorted(group.get_players(), key=lambda x: x.participant.id_in_session) # for player in players: # player_fns = player_fns or get_field_names_for_csv(player.__class__) body.append(session_list + subsession_list + group_list + market_list) return headers, body
def get_hierarchical_data_for_app(cls, app_name, return_columns=False): """ Generate hierarchical structured data for app `app_name`, optionally returning flattened field names. """ models_module = get_models_module(app_name) # get the standard models Player = models_module.Player Group = models_module.Group Subsession = models_module.Subsession # get the custom models configuration custom_models_conf = get_custom_models_conf(models_module, 'export_data') # build standard models' columns columns_for_models = { m.__name__.lower(): get_field_names_for_csv(m) for m in [Player, Group, Subsession, Participant, Session] } # build custom models' columns columns_for_custom_models = cls.custom_columns_builder( custom_models_conf) custom_models_links = get_links_between_std_and_custom_models( custom_models_conf, for_action='export_data') std_models_select_related = defaultdict(list) for smodel_class, cmodels_links in custom_models_links.items(): smodel_lwr = smodel_class.__name__.lower() for cmodel_class, _ in cmodels_links: std_models_select_related[smodel_lwr].append( cmodel_class.__name__.lower()) # create lists of IDs that will be used for the export participant_ids = set( Player.objects.values_list('participant_id', flat=True)) session_ids = set( Subsession.objects.values_list('session_id', flat=True)) # create standard model querysets qs_participant = Participant.objects.filter(id__in=participant_ids) qs_player = Player.objects.filter(session_id__in=session_ids)\ .order_by('id')\ .select_related(*std_models_select_related.get('player', [])).values() qs_group = Group.objects.filter(session_id__in=session_ids)\ .select_related(*std_models_select_related.get('group', [])) qs_subsession = Subsession.objects.filter(session_id__in=session_ids)\ .select_related(*std_models_select_related.get('subsession', [])) # create prefetch dictionaries from querysets that map IDs to subsets of the data prefetch_filter_ids_for_custom_models = { } # stores IDs per standard oTree model to be used for # custom data prefetching # session ID -> subsession rows for this session prefetch_subsess = _rows_per_key_from_queryset(qs_subsession, 'session_id') prefetch_filter_ids_for_custom_models[ 'subsession'] = _set_of_ids_from_rows_per_key( prefetch_subsess, 'id') # subsession ID -> group rows for this subsession prefetch_group = _rows_per_key_from_queryset(qs_group, 'subsession_id') prefetch_filter_ids_for_custom_models[ 'group'] = _set_of_ids_from_rows_per_key(prefetch_group, 'id') # group ID -> player rows for this group prefetch_player = _rows_per_key_from_queryset(qs_player, 'group_id') prefetch_filter_ids_for_custom_models[ 'player'] = _set_of_ids_from_rows_per_key(prefetch_player, 'id') # prefetch dict for custom data models prefetch_custom = defaultdict( dict ) # standard oTree model name -> custom model name -> data rows for smodel, cmodel_links in custom_models_links.items( ): # per oTree std. model smodel_name_lwr = smodel.__name__.lower() # IDs that occur for that model filter_ids = prefetch_filter_ids_for_custom_models[smodel_name_lwr] # iterate per custom model for model, link_field_name in cmodel_links: # prefetch custom model objects that are linked to these oTree std. model IDs filter_kwargs = {link_field_name + '__in': filter_ids} custom_qs = model.objects.filter(**filter_kwargs).values() # store to the dict m = model.__name__.lower() prefetch_custom[smodel_name_lwr][ m] = _rows_per_key_from_queryset(custom_qs, link_field_name) # build the final nested data structure output_nested = [] ordered_columns_per_model = OrderedDict() # 1. each session for sess in Session.objects.filter(id__in=session_ids).values(): sess_cols = columns_for_models['session'] if 'session' not in ordered_columns_per_model: ordered_columns_per_model['session'] = sess_cols out_sess = _odict_from_row(sess, sess_cols) # 1.1. each subsession in the session out_sess['__subsession'] = [] for subsess in prefetch_subsess[sess['id']]: subsess_cols = columns_for_models['subsession'] if 'subsession' not in ordered_columns_per_model: ordered_columns_per_model['subsession'] = subsess_cols out_subsess = _odict_from_row(subsess, subsess_cols) # 1.1.1. each possible custom models connected to this subsession subsess_custom_models_rows = prefetch_custom.get( 'subsession', {}) for subsess_cmodel_name, subsess_cmodel_rows in subsess_custom_models_rows.items( ): cmodel_cols = columns_for_custom_models[ subsess_cmodel_name] if subsess_cmodel_name not in ordered_columns_per_model: ordered_columns_per_model[ subsess_cmodel_name] = cmodel_cols out_subsess['__' + subsess_cmodel_name] = [ _odict_from_row(cmodel_row, cmodel_cols) for cmodel_row in subsess_cmodel_rows[subsess['id']] ] # 1.1.2. each group in this subsession out_subsess['__group'] = [] for grp in prefetch_group[subsess['id']]: grp_cols = columns_for_models['group'] if 'group' not in ordered_columns_per_model: ordered_columns_per_model['group'] = grp_cols out_grp = _odict_from_row(grp, grp_cols) # 1.1.2.1. each possible custom models connected to this group grp_custom_models_rows = prefetch_custom.get('group', {}) for grp_cmodel_name, grp_cmodel_rows in grp_custom_models_rows.items( ): cmodel_cols = columns_for_custom_models[ grp_cmodel_name] if grp_cmodel_name not in ordered_columns_per_model: ordered_columns_per_model[ grp_cmodel_name] = cmodel_cols out_grp['__' + grp_cmodel_name] = [ _odict_from_row(cmodel_row, cmodel_cols) for cmodel_row in grp_cmodel_rows[grp['id']] ] # 1.1.2.2. each player in this group out_grp['__player'] = [] for player in prefetch_player[grp['id']]: # because player.payoff is a property player['payoff'] = player['_payoff'] player_cols = columns_for_models['player'] + [ 'participant_id' ] if 'player' not in ordered_columns_per_model: ordered_columns_per_model['player'] = player_cols out_player = _odict_from_row(player, player_cols) # 1.1.2.2.1. participant object connected to this player participant_obj = qs_participant.get( id=out_player['participant_id']) out_player['__participant'] = _odict_from_row( participant_obj, columns_for_models['participant'], is_obj=True) # 1.1.2.2.2. each possible custom models connected to this player player_custom_models_rows = prefetch_custom.get( 'player', {}) for player_cmodel_name, player_cmodel_rows in player_custom_models_rows.items( ): cmodel_cols = columns_for_custom_models[ player_cmodel_name] if player_cmodel_name not in ordered_columns_per_model: ordered_columns_per_model[ player_cmodel_name] = cmodel_cols out_player['__' + player_cmodel_name] = [ _odict_from_row(cmodel_row, cmodel_cols) for cmodel_row in player_cmodel_rows[player['id']] ] out_grp['__player'].append(out_player) out_subsess['__group'].append(out_grp) out_sess['__subsession'].append(out_subsess) output_nested.append(out_sess) # generate column names columns_flat = [] for model_name, model_cols in ordered_columns_per_model.items(): columns_flat.extend( ['.'.join((model_name, c)) for c in model_cols]) if return_columns: return output_nested, columns_flat else: return output_nested
def get_dataframe_for_app(cls, app_name): """ Generate data rows for app `app_name`, also adding rows of custom data models. """ models_module = get_models_module(app_name) # get the standard models Player = models_module.Player Group = models_module.Group Subsession = models_module.Subsession # get custom model configuration, if there is any custom_models_conf = get_custom_models_conf(models_module, for_action='export_data') # identify links between standard and custom models links_to_custom_models = get_links_between_std_and_custom_models( custom_models_conf, for_action='export_data') # find out column names for standard models std_models_colnames = { m.__name__.lower(): get_field_names_for_csv(m) for m in (Session, Subsession, Group, Player, Participant) } std_models_colnames['player'].append('participant_id') # find out column names for custom models custom_models_colnames = cls.custom_columns_builder(custom_models_conf) # create lists of IDs that will be used for the export participant_ids = set( Player.objects.values_list('participant_id', flat=True)) session_ids = set( Subsession.objects.values_list('session_id', flat=True)) filter_in_sess = {'session_id__in': session_ids} std_models_querysets = ( (Session, Session.objects.filter(id__in=session_ids), (None, None)), (Subsession, Subsession.objects.filter(**filter_in_sess), ('session.id', 'subsession.session_id')), (Group, Group.objects.filter(**filter_in_sess), ('subsession.id', 'group.subsession_id')), (Player, Player.objects.filter(**filter_in_sess), ('group.id', 'player.group_id')), (Participant, Participant.objects.filter(id__in=participant_ids), ('player.participant_id', 'participant.id')), ) # create a dataframe for this app's complete data incl. custom models data df = get_dataframe_from_linked_models(std_models_querysets, links_to_custom_models, std_models_colnames, custom_models_colnames) # sanitize each value df = df.applymap(sanitize_pdvalue_for_csv) return df
def export_marketdata(): """ TODO: profit_final mistake_count mistake_count_1 mistake_count_8 mistake_count_16 role_count_seller role_count_seller_1 role_count_seller_8 role_count_seller_16 role_count_buyer role_count_buyer_1 role_count_buyer_8 role_count_buyer_16 TODO: add survey data key: app_label = subsession._meta.app_config.name """ # this will be a list that contains data of all sessions body = [] # Create the header list maxdim = max(Constants.treatmentdims) metadata_fns = ["treatmentorder", "date", "time"] session_fns = [ 'code', 'id', 'label', 'experimenter_name', 'time_scheduled', 'time_started', 'comment', 'is_demo' ] subsession_fns = get_field_names_for_csv(Subsession) group_fns = get_field_names_for_csv(Group) market_fns = get_market_headers(maxdim) participant_fns = ['participant_id_in_sess', 'participant_code'] player_fns = get_field_names_for_csv(Player) pricedim_fns = ["p" + str(i) for i in range(1, maxdim + 1)] headers = session_fns + metadata_fns + subsession_fns + group_fns + market_fns + participant_fns + player_fns + \ pricedim_fns # Create the body list of rows # get the complete result data from the database qs_results = models.Player.objects.select_related('subsession', 'subsession__session', 'group', 'participant') \ .prefetch_related('ask_set') \ .all() # get all sessions, order them by label sessions = sorted(set([p.subsession.session for p in qs_results]), key=lambda x: x.label) # loop through all sessions for session in sessions: session_list = list_from_obj(session_fns, session) metadata_list = list_from_obj(metadata_fns, session.config) # loop through all subsessions (i.e. rounds) ordered by round number subsessions = sorted(session.get_subsessions(), key=lambda x: x.round_number) for subsession in subsessions: subsession_list = list_from_obj(subsession_fns, subsession) # loop through all groups ordered by ID groups = sorted(subsession.get_groups(), key=lambda x: x.id_in_subsession) for group in groups: # group_fieldnames = group_fieldnames or get_field_names_for_csv(g.__class__) group_list = list_from_obj(group_fns, group) market_list = [] for role in ("S1", "S2"): seller = group.get_player_by_role(role) market_list += [ seller.id_in_group, seller.participant.id_in_session, seller.payoff, seller.ask_total, seller.ask_stdev, seller.numsold ] # add price dims and appropriate number of blank spaces pricedims = seller.get_pricedims() if len(pricedims) == 0: # only here if this row is not yet populated (checking data mid-stream) market_list += [""] * maxdim else: for i in range(1, maxdim + 1): if i <= subsession.dims: market_list += [pricedims[i - 1].value] else: market_list += [""] for role in ("B1", "B2"): buyer = group.get_player_by_role(role) market_list += [ buyer.id_in_group, buyer.participant.id_in_session, buyer.payoff, buyer.bid_total, buyer.contract_seller_rolenum ] # loop through all players ordered by ID players = sorted(group.get_players(), key=lambda x: x.participant.id_in_session) for player in players: participant = player.participant player_list = [ participant.id_in_session, participant.code ] + list_from_obj(player_fns, player) pricedim_list = get_pd_list(player, subsession.dims, maxdim) body.append(session_list + metadata_list + subsession_list + group_list + market_list + player_list + pricedim_list) return headers, body
def export_combineddata(): """ Getting ALL the fields in one place This will not return all Asks (only latest) This will not return response times This will not return payment data :return: csv headers and body for output """ maxdim = max(Constants.treatmentdims) body = [] # only using the participant headers from this ... session_fns, session_fns_d, group_fns, group_fns_d, participant_fns, participant_fns_d = get_headers_simple( ) session_fns = ["id"] + get_field_names_for_csv(Session) metadata_fns = ["treatmentorder", "date", "time"] subsession_fns = get_field_names_for_csv( Subsession) # this will only get the market subsessions (not survey) group_fns = get_field_names_for_csv(Group) market_fns = get_market_headers(maxdim) player_fns = get_field_names_for_csv(Player) pricedim_fns = ["p" + str(i) for i in range(1, maxdim + 1)] survey_fns = get_field_names_for_csv(PlayerSurvey) headers = session_fns + metadata_fns + subsession_fns + group_fns + market_fns + participant_fns_d + player_fns + \ pricedim_fns + survey_fns sessions = Session.objects.order_by("pk") for session in sessions: session_list = list_from_obj(session_fns, session) metadata_list = list_from_obj(metadata_fns, session.config) # I believe this method excludes subsessions from other apps, and thus we do not need to filter on app name subsessions = Subsession.objects.filter(session=session) for subsession in subsessions: subsession_list = list_from_obj(subsession_fns, subsession) groups = subsession.get_groups() for group in groups: group_list = list_from_obj(group_fns, group) market_list = get_market_row(group, subsession.dims, maxdim) players = group.get_players() for player in players: participant = player.participant participant_list = list_from_obj(participant_fns, participant) player_list = list_from_obj(player_fns, player) # TODO price dims pricedim_list = get_pd_list(player.get_pricedims(), subsession.dims, maxdim) # subsession_survey = SubsessionSurvey.objects.get(session=session) # players_survey = subsession_survey.get_players() player_survey = PlayerSurvey.objects.get( session=session, participant__code=participant.code) survey_list = list_from_obj(survey_fns, player_survey) body.append(session_list + metadata_list + subsession_list + group_list + market_list + participant_list + player_list + pricedim_list + survey_list) return headers, body
def get_rows_for_wide_csv(session_code): cursession = Session.objects.get(code=session_code) sessions = [cursession] session_cache = {row.id: row for row in sessions} session_config_fields = set() for session in sessions: for field_name in SessionConfig(session.config).editable_fields(): session_config_fields.add(field_name) session_config_fields = list(session_config_fields) participants = Participant.objects.filter( session=cursession).order_by('id').values() if not participants: # 1 empty row return [[]] session_fields = get_field_names_for_csv(Session) participant_fields = get_field_names_for_csv(Participant) participant_fields.append('payoff_plus_participation_fee') header_row = [ 'participant.{}'.format(fname) for fname in participant_fields ] header_row += ['session.{}'.format(fname) for fname in session_fields] header_row += [ 'session.config.{}'.format(fname) for fname in session_config_fields ] rows = [header_row] for participant in participants: session = cursession #session_cache[participant['session_id']] participant[ 'payoff_plus_participation_fee'] = get_payoff_plus_participation_fee( session, participant) row = [ sanitize_for_csv(participant[fname]) for fname in participant_fields ] row += [ sanitize_for_csv(getattr(session, fname)) for fname in session_fields ] row += [ sanitize_for_csv(session.config.get(fname)) for fname in session_config_fields ] rows.append(row) # heuristic to get the most relevant order of apps app_sequences = collections.Counter() for session in sessions: # we loaded the config earlier app_sequence = session.config['app_sequence'] app_sequences[tuple(app_sequence)] += session.num_participants most_common_app_sequence = app_sequences.most_common(1)[0][0] apps_not_in_popular_sequence = [ app for app in settings.INSTALLED_OTREE_APPS if app not in most_common_app_sequence ] order_of_apps = list( most_common_app_sequence) + apps_not_in_popular_sequence rounds_per_app = OrderedDict() for app_name in order_of_apps: models_module = get_models_module(app_name) agg_dict = models_module.Subsession.objects.all().aggregate( Max('round_number')) highest_round_number = agg_dict['round_number__max'] if highest_round_number is not None: rounds_per_app[app_name] = highest_round_number for app_name in rounds_per_app: for round_number in range(1, rounds_per_app[app_name] + 1): new_rows = get_rows_for_wide_csv_round(app_name, round_number, sessions) for i in range(len(rows)): try: rows[i].extend(new_rows[i]) except: ... return rows
def generate_doc_dict(): doc_dict = OrderedDict() data_types_readable = { 'PositiveIntegerField': 'positive integer', 'IntegerField': 'integer', 'BooleanField': 'boolean', 'CharField': 'text', 'TextField': 'text', 'FloatField': 'decimal', 'DecimalField': 'decimal', 'CurrencyField': 'currency' } for model_name in model_names: if model_name == 'Participant': Model = Participant elif model_name == 'Session': Model = Session else: Model = getattr(models_module, model_name) # print(model_name) field_names = set(field.name for field in Model._meta.fields) members = get_field_names_for_csv(Model) if not members: members = [f for f in inspect_field_names(Model)] doc_dict[model_name] = OrderedDict() for member_name in members: member = getattr(Model, member_name, None) doc_dict[model_name][member_name] = OrderedDict() if member_name == 'id': doc_dict[model_name][member_name]['type'] = [ 'positive integer' ] doc_dict[model_name][member_name]['doc'] = ['Unique ID'] elif member_name in field_names: member = Model._meta.get_field_by_name(member_name)[0] internal_type = member.get_internal_type() data_type = data_types_readable.get( internal_type, internal_type) doc_dict[model_name][member_name]['type'] = [data_type] # flag error if the model doesn't have a doc attribute, # which it should unless the field is a 3rd party field doc = getattr(member, 'doc', '[error]') or '' doc_dict[model_name][member_name]['doc'] = [ line.strip() for line in doc.splitlines() if line.strip() ] choices = getattr(member, 'choices', None) if choices: doc_dict[model_name][member_name]['choices'] = ( choices_readable(choices)) elif isinstance(member, collections.Callable): doc_dict[model_name][member_name]['doc'] = [ inspect.getdoc(member) ] return doc_dict